0
点赞
收藏
分享

微信扫一扫

正则表达式匹配(自动机)

晚熟的猫 2022-03-11 阅读 121

自动机实现正则表达式匹配,https://leetcode.com/problems/regular-expression-matching/

这题有更简单的做法,自动机则更加通用

自动机配合hash实现NFA到DFA的转化,Compiler课里面看到很多自动机自动生成的工具,所以就写了一个简单的。

#include <bits/stdc++.h>
using namespace std;
//A program resolve RegExp contains only letters & *(no consecutive *) with len <= 30
const static int bign = 33;
const static int hashNum = 100003;
const static int charNum = 26;
const static int maxStateNum = 1033;
int mark[bign];
char valchar[bign];
int n = 0;
int stateNum = 1;

int firstend;

int stateAuto[maxStateNum][charNum];
int mq[maxStateNum];
int mfront, mtail;
vector<int> hashList[hashNum];
vector<vector<int>> stateToIndex;
vector<pair<int, int>> nxtState[bign];
bool isend[maxStateNum];
int vst[26][bign];


inline int hashCode(vector<int> &vec)
{
	int ret = 0;
	for (int v : vec)
		ret = (ret * 31 + v) % hashNum;
	return ret;
}

inline bool equalVec(vector<int>& vec1, vector<int>& vec2)
{
	if (vec1.size() != vec2.size())
		return false;
	for (int i = 0; i < vec1.size(); i++)
		if (vec1[i] != vec2[i])
			return false;
	return true;
}

void AutoCons(const char* RegExp)
{
	stateNum = 1;
	int slen = strlen(RegExp);
	
	for (int i = 0; i < slen; i++)
	{
		if (RegExp[i] == '*')
		{
			mark[n - 1] = 1;
		}
		else
		{
			valchar[n++] = RegExp[i];
		}
	}
	firstend = n;
	for (int i = n - 1; i >= 0; i--)
	{
		if (mark[i] == 0)
		{
			//firstend = i + 1;
			break;
		}
		else
		{
			firstend--;
		}
	}
	for (int i = n - 1; i >= 0; i--)
	{
		if (valchar[i] == '.')
		{
			for (int j = 0; j < 26; j++)
			{
				if (1 == mark[i])
				{
					nxtState[i].push_back(make_pair(j, i));
					vst[j][i] = i + 1;
				}
				nxtState[i].push_back(make_pair(j, i + 1));
				vst[j][i + 1] = i + 1;
			}
		}
		else
		{
			if (1 == mark[i])
			{
				vst[valchar[i] - 'a'][i] = i + 1;
				nxtState[i].push_back(make_pair(valchar[i] - 'a', i));
			}
			vst[valchar[i] - 'a'][i + 1] = i + 1;
			nxtState[i].push_back(make_pair(valchar[i] - 'a', i + 1));
		}
		if (1 == mark[i])
		{
			for (auto it : nxtState[i + 1])
			{
				if(vst[it.first][it.second] != i + 1)
					nxtState[i].push_back(it);
			}
		}
		//sort(nxtState[i].begin(), nxtState[i].end());
		//unique(nxtState[i].begin(), nxtState[i].end());
	}
	vector<int> state1(1, 0);
	if (firstend == 0)
		isend[1] = true;
	for (int i = 0; i < n; i++)
	{
		if (1 == mark[i])
		{
			state1.push_back(i + 1);
			if(firstend <= i + 1)
				isend[1] = true;
		}
		else
			break;
	}
	stateToIndex.push_back(state1);
	hashList[hashCode(state1)].push_back(1);

	mq[mtail++] = stateNum;
	memset(vst, 0, sizeof(vst));
	while (mfront < mtail)
	{
		int u = mq[mfront++];
		//u--;
		vector<int> tvec[26];
		for(int i = 0; i < 26; i++)
			tvec[i].clear();
		
		for (int i = 0; i < stateToIndex[u - 1].size(); i++)
		{
			for (pair<int, int> tautomata : nxtState[stateToIndex[u - 1][i]])
			{
				int x = tautomata.first;
				int y = tautomata.second;
				if (vst[x][y] != u)
					vst[x][y] = u;
				else
					continue;
				tvec[x].push_back(y);
				//if (tautomata.second >= firstend)
				//	endflag = true;
			}
		}
		
		for (int i = 0; i < 26; i++)
		{
			if (tvec[i].empty())
				continue;
		//	sort(tvec[i].begin(), tvec[i].end());
		//	unique(tvec[i].begin(), tvec[i].end());
			int hashnum = hashCode(tvec[i]);
			bool flag = false;
			for (int stateid : hashList[hashnum])
			{
				if (equalVec(stateToIndex[stateid - 1], tvec[i]))
				{
					flag = true;
					stateAuto[u][i] = stateid;
					break;
				}
			}
			if (!flag)
			{
				stateNum++;
				stateAuto[u][i] = stateNum;
				hashList[hashnum].push_back(stateNum);
				stateToIndex.push_back(tvec[i]);
				mq[mtail++] = stateNum;
				if (tvec[i][tvec[i].size() - 1] >= firstend)
					isend[stateNum] = true;
			}
		}
	}
	cout << "stateNum of DFA = " << stateNum << endl;
}

bool mMatch(const char* matchstr)
{
	int slen = strlen(matchstr);
	int nowstate = 1;
	for (int i = 0; i < slen; i++)
	{
		int tc = matchstr[i] - 'a';
		nowstate = stateAuto[nowstate][tc];
		if (0 == nowstate)
		{
			return false;
		}
		//if (isend[nowstate])
		//	return true;
	}
	return isend[nowstate];
}

int main()
{
	//"bbbba"
	//	".*a*a"
	string p = ".*a*a";
	string s = "bbbba";
	AutoCons(p.c_str());

	if (mMatch(s.c_str()))
		cout << "matching successfully" << endl;
	else
		cout << "matching failed" << endl;
}
举报

相关推荐

0 条评论