POJ 3415 Common Substrings(字尾陣列求重複字串)

畫船聽雨發表於2015-01-27

題目大意:給你兩個字串,讓你求出來兩個字串之間的重複子串長度大於k的有多少個。

解題思路:

先說論文上給的解釋:基本思路是計算A的所有字尾和B的所有字尾之間的最長公共字首的長度,把最長公共字首長度不小於k的部分全部加起來。先將兩個字串連起來,中間用一個沒有出現過的字元隔開。按height值分組後,接下來的工作便是快速的統計每組中字尾之間的最長公共字首之和。掃描一遍,每遇到一個B的字尾就統計與前面的A的字尾能產生多少個長度不小於k的公共子串,這裡A的字尾需要用一個單調的棧來高效的維護。然後對A也這樣做一次。具體的細節留給讀者思考。
給的解釋很模糊,我們自己來補充一下,首先根據LCP定理,LCP(i, j) = min{LCP(k-1, k)|i+1 <= k <= j};所以hieght連續的在一起的如果都小於k了那麼這段就可以不要了,這就是分組。分組的同時還要用到單調棧使得每組裡面都包含一個單調遞增的序列,這樣算起來就比較簡單省事了啊。注意就是去掉重複的內容,已經加過的不要再重複加了啊。

棧中的每一個元素擁有兩個屬性,第一個是其值為多少,第二個是前面還有多少個能夠提供這個值的一共有多少個(如果新加入的height值比之前較小時,將回收之前的height值,將其視為同一高度,直到遇到比它小的)。需要對height陣列作兩次。

Common Substrings
Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 7686   Accepted: 2549

Description

A substring of a string T is defined as:

T(i, k)=TiTi+1...Ti+k-1, 1≤ii+k-1≤|T|.

Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):

S = {(i, j, k) | kK, A(i, k)=B(j, k)}.

You are to give the value of |S| for specific A, B and K.

Input

The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.

1 ≤ |A|, |B| ≤ 105
1 ≤ Kmin{|A|, |B|}
Characters of A and B are all Latin letters.

Output

For each case, output an integer |S|.

Sample Input

2
aababaa
abaabaa
1
xx
xx
0

Sample Output

22
5

Source

#include <algorithm>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <iomanip>
#include <stdio.h>
#include <string>
#include <queue>
#include <cmath>
#include <stack>
#include <ctime>
#include <map>
#include <set>
#define eps 1e-9
///#define M 1000100
///#define LL __int64
#define LL long long
///#define INF 0x7ffffff
#define INF 0x3f3f3f3f
#define PI 3.1415926535898
#define zero(x) ((fabs(x)<eps)?0:x)
#define mod 1000000007
#define Read() freopen("autocomplete.in","r",stdin)
#define Write() freopen("autocomplete.out","w",stdout)
#define Cin() ios::sync_with_stdio(false)

using namespace std;



inline int read()
{
    char ch;
    bool flag = false;
    int a = 0;
    while(!((((ch = getchar()) >= '0') && (ch <= '9')) || (ch == '-')));
    if(ch != '-')
    {
        a *= 10;
        a += ch - '0';
    }
    else
    {
        flag = true;
    }
    while(((ch = getchar()) >= '0') && (ch <= '9'))
    {
        a *= 10;
        a += ch - '0';
    }
    if(flag)
    {
        a = -a;
    }
    return a;
}
void write(int a)
{
    if(a < 0)
    {
        putchar('-');
        a = -a;
    }
    if(a >= 10)
    {
        write(a / 10);
    }
    putchar(a % 10 + '0');
}

const int maxn = 200010;



int wa[maxn], wb[maxn], wv[maxn], ws1[maxn];
int sa[maxn];

int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}


void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for(i = 0; i < m; i++) ws1[i] = 0;
    for(i = 0; i < n; i++) ws1[x[i] = r[i]]++;
    for(i = 1; i < m; i++) ws1[i] += ws1[i-1];
    for(i = n-1; i >= 0; i--) sa[--ws1[x[i]]] = i;
    for(j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        for(p = 0, i = n-j; i < n; i++) y[p++] = i;
        for(i = 0; i < n; i++)
            if(sa[i] >= j) y[p++] = sa[i]-j;
        for(i = 0; i < n; i++) wv[i] = x[y[i]];
        for(i = 0; i < m; i++) ws1[i] = 0;
        for(i = 0; i < n; i++) ws1[wv[i]]++;
        for(i = 1; i < m; i++) ws1[i] += ws1[i-1];
        for(i = n-1; i >= 0; i--) sa[--ws1[wv[i]]] = y[i];
        for(swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
    }
    return ;
}

int rank[maxn], height[maxn];

void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for(i = 1; i <= n; i++) rank[sa[i]] = i;
    for(i = 0; i < n; height[rank[i++]] = k)
        for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++);
    return;
}

char str1[maxn], str2[maxn];
int seq[maxn];


int sta[maxn][2];

void Del(int n, int len1, int len2, int k)
{
    LL sum = 0;
    int top = 0;
    LL ans = 0;
    for(int i = 1; i <= n; i++)
    {
        if(height[i] < k)
        {
            ans = 0;
            top = 0;
            continue;
        }
        int cnt = 0;
        if(sa[i-1] < len1)
        {
            cnt++;
            ans += (height[i]-k+1);
        }
        while(top && sta[top-1][0] >= height[i])
        {
            top--;
            ans -= sta[top][1]*(sta[top][0]-height[i]);
            cnt += sta[top][1];
        }
        sta[top][0] = height[i];
        sta[top++][1] = cnt;
        if(sa[i] > len1) sum += ans;
    }


    top = 0;
    ans = 0;
    for(int i = 1; i <= n; i++)
    {
        if(height[i] < k)
        {
            ans = 0;
            top = 0;
            continue;
        }
        int cnt = 0;
        if(sa[i-1] > len1)
        {
            cnt++;
            ans += (height[i]-k+1);
        }
        while(top && sta[top-1][0] >= height[i])
        {
            top--;
            ans -= sta[top][1]*(sta[top][0]-height[i]);
            cnt += sta[top][1];
        }
        sta[top][0] = height[i];
        sta[top++][1] = cnt;
        if(sa[i] < len1) sum += ans;
    }
    cout<<sum<<endl;
}


int main()
{
    int k;
    while(~scanf("%d", &k) && k)
    {
        scanf("%s %s",str1, str2);
        int len1 = strlen(str1);
        int len2 = strlen(str2);


        int ans = 0;
        for(int i = 0; i < len1; i++)
            seq[ans++] = str1[i];
        seq[ans++] = 1;
        for(int i = 0; i < len2; i++)
            seq[ans++] = str2[i];
        seq[ans] = 0;
        da(seq, sa, ans+1, 130);
        calheight(seq, sa, ans);
        Del(ans, len1, len2, k);
    }
    return 0;
}



相關文章