字符串匹配实质:树上或图中,点与点或连通块与连通块,之间的,一一对应关系。
换句话说,字符串匹配的本质是图论。
说在前面——
S:模板串
T:匹配串
n:S长度
m:T长度
常见形式——
1.判断S的子串中是否含有T
做法:双指针
时间复杂度:O(min(n,m))
int head = 0, tail = 0, cnt = 0;
while (head < n && tail < m) {
if (S[head] == T[tail]) tail++, cnt++;
head++;
}
2.判断S的连续子串中是否含有T
做法:KMP
时间复杂度:O(n + m)
void get_next() {
for (int i = 2, j = 0; i <= m; i++) {
while (j && T[i] != T[j + 1]) j = next[j];
if (T[i] == T[j + 1]) j++;
next[i] = j;
}
}
void solve() {
for (int i = 1, j = 0; i <= n && j <= m; i++) {
while (j && S[i] != T[j + 1]) j = next[j];
if (S[i] == T[j + 1]) j++;
if (j == m) { cout << i - j + 1 << "\n"; j = next[j]; }
}
for (int i = 1; i <= m; i++) {
cout << next[i] << " ";
}
}
3.当S和T可以顺时针/逆时针旋转时,判断S的连续子串中是否存在T
做法:预处理+KMP
for (int i = 1; i <= n - 1; i++) {
S[i + n] = S[i];
T[i + n] = T[i];
}
void get_next() {
for (int i = 2, j = 0; i <= m; i++) {
while (j && T[i] != T[j + 1]) j = next[j];
if (T[i] == T[j + 1]) j++;
next[i] = j;
}
}
void solve() {
for (int i = 1, j = 0; i <= n && j <= m; i++) {
while (j && S[i] != T[j + 1]) j = next[j];
if (S[i] == T[j + 1]) j++;
if (j == m) { cout << i - j + 1 << "\n"; j = next[j]; }
}
for (int i = 1; i <= m; i++) {
cout << next[i] << " ";
}
}
4.当S和T长度相同,字符种类数相同且和长度相同,各自串中无相同字符,并且可以顺时针/逆时针旋转时,判断S的子串中是否存在T
做法:预处理+桶排+计数
for (int i = 1; i <= n; i++) {
cin >> x;
a[x] = i;
}
// 此时,S和T长度相同,都为n
for (int i = 1; i <= n; i++) {
cin >> x;
b[x] = i;
}
// n为字符种类数
for (int i = 0; i <= n - 1; i++) {
cnt[(b[i] - a[i] + n) % n]++;
}
int ans = 0;
for (int i = 0; i <= n - 1; i++) {
ans = max(ans, cnt[i]);
}
if (ans == n) cout << "YES\n";
else cout << "NO\n";
总结——
字符串匹配实质:树上或图中,点与点或连通块与连通块,之间的,一一对应关系。
换句话说,字符串匹配的本质是图论。