两篇论文:许智磊后缀数组.pdf 后缀数组——处理字符串的有力工具.pdf
贴两模版:
DA:
/* *后缀数组,倍增算法实现,复杂度O(nlogn) *sa[i]: 第i小的后缀是在字符串位置,即后缀sa[i] *rank[i]: 后追i在sa数组下标,即第rank[i]小 *height[i]: LCP (suffix (sa[i-1], sa[i])) */ int sa[N], rank[N], height[N]; int ws[N], wa[N], wb[N]; bool cmp(int *r, int a, int b, int l) { return (r[a] == r[b] && r[a+l] == r[b+l]); } //r数组为读入的字符串,m = max (r[i]) + 1,一般字符128足够了 //n为strlen (s) + 1,加上最后一个' ' void DA(char *r, int n, int m = 128) { int i, j, p, *x = wa, *y = wb; for (i=0; i<m; ++i) ws[i] = 0; for (i=0; i<n; ++i) ws[x[i]=r[i]]++; for (i=1; i<m; ++i) ws[i] += ws[i-1]; for (i=n-1; i>=0; --i) sa[--ws[x[i]]] = i; for (j=1, p=1; p<n; j<<=1, m=p) { for (p=0, i=n-j; i<n; ++i) y[p++] = i; for (i=0; i<n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j; for (i=0; i<m; ++i) ws[i] = 0; for (i=0; i<n; ++i) ws[x[y[i]]]++; for (i=1; i<m; ++i) ws[i] += ws[i-1]; for (i=n-1; i>=0; --i) sa[--ws[x[y[i]]]] = y[i]; std::swap (x, y); for (p = 1, x[sa[0]] = 0, i=1; i<n; ++i) { x[sa[i]] = cmp (y, sa[i-1], sa[i], j) ? p - 1 : p++; } } } void calc_height(char *r, int *sa, int n) { int i, j, k = 0; for (i=1; i<=n; ++i) rank[sa[i]] = i; //i: 第i小的后缀 sa[0] = n(s[n]=' ') for (i=0; i<n; ++i) { //i: 后缀i if (k) k--; j = sa[rank[i]-1]; while (r[i+k] == r[j+k]) k++; height[rank[i]] = k; //其实并没有计算height[n] } } /* *LCP (suffix (i), suffix (j)) = min (height[l] to height[r]); //RMQ *l = rank[i], r = rank[j]; if (l > r) swap (l, r); l++; */
DC3:
/* *后缀数组,DC3算法实现,复杂度O(n) */ int wa[N],wb[N],wv[N],ws[N]; int rank[N],height[N]; int sa[N],r[N]; int c0(int *y,int a,int b) { return y[a]==y[b]&&y[a+1]==y[b+1]&&y[a+2]==y[b+2]; } int c12(int k,int *y,int a,int b) { if(k==2) return y[a]<y[b]||y[a]==y[b]&&c12(1,y,a+1,b+1); else return y[a]<y[b]||y[a]==y[b]&&wv[a+1]<wv[b+1]; } void sort(int *r,int *a,int *b,int n,int m) { int i; for(i=0;i<n;i++) wv[i]=r[a[i]]; for(i=0;i<m;i++) ws[i]=0; for(i=0;i<n;i++) ws[wv[i]]++; for(i=1;i<m;i++) ws[i]+=ws[i-1]; for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i]; } void DC3(int *r,int *sa,int n,int m) { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i; sort(r+2,wa,wb,tbc,m); sort(r+1,wb,wa,tbc,m); sort(r,wa,wb,tbc,m); for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++; if(p<tbc) dc3(rn,san,tbc,p); else for(i=0;i<tbc;i++) san[rn[i]]=i; for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3; if(n%3==1) wb[ta++]=n-1; sort(r,wb,wa,ta,m); for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i; for(i=0,j=0,p=0;i<ta && j<tbc;p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++]; for(;i<ta;p++) sa[p]=wa[i++]; for(;j<tbc;p++) sa[p]=wb[j++]; } void calc_height(int *r,int *sa,int n) { int i,j,k=0; for(i=1;i<=n;i++) rank[sa[i]]=i; for(i=0;i<n;height[rank[i++]]=k) for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); }