[NOIP2020]T2字符串匹配

题外话：在考场上写了$O(n ln nlog26) $ 的蠢做法，据说用暴力(O(n*26+nln n))替代树状数组可以拿92分的好成绩(O(n*26+nln n))

这里介绍两种(O(n*log 26))的做法

首先经思考容易想到先预处理出后缀(C)的奇数次字符数量，然后枚举(AB)的大小（用哈希判断一下就行）。

因为(B)只要至少有一个字符就满足条件，而(A)又是前缀，所以就直接维护(A)中的奇数次字符数量，最多有26个，用树状数组维护就行，复杂度：(O(n ln nlog26))（由于查询是一个前缀，所以在更新(A)时直接暴力修改前缀数组就可以去掉(log 26)）。~~讲道理数据稍微随机一点都能跑过去~~

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000,base1=1021831,base2=96269; 
ull hash1[N],hash2[N],pow1[N],pow2[N];
ull ff1(ll l,ll r) {return hash1[r]-hash1[l-1]*pow1[r-l+1];}
ull ff2(ll l,ll r) {return hash2[r]-hash2[l-1]*pow2[r-l+1];}
pair<ull,ull> ff(ll l,ll r) {return make_pair(ff1(l,r),ff2(l,r));}
struct BIT
{
    ll sum[30],n;
    void clear() {memset(sum,0,sizeof(sum));}
    inline ll lowbit(ll x) {return x&(-x);}
    void add(ll x,ll v) {while(x<=n) sum[x]+=v,x+=lowbit(x);}
    ll find(ll x){ll ans=0;while(x) ans+=sum[x],x-=lowbit(x);return ans;}
}T;
ll n,ans,tot[30],cnt,_tot[N];
char ch[N];
int main()
{
    ll Q,i,j;
    pair<ull,ull> tmp;
    scanf("%lld",&Q);
    T.n=26;
    while(Q--)
    {
        T.clear();
        memset(tot,0,sizeof(tot));
        memset(_tot,0,sizeof(_tot));
        cnt=ans=0;
        scanf("%s",ch+1);
        n=strlen(ch+1);
        hash1[0]=hash2[0]=0,pow1[0]=pow2[0]=1;
        for(i=1;i<=n;i++) 
        {
            hash1[i]=hash1[i-1]*base1+ch[i],hash2[i]=hash2[i-1]*base2+ch[i];
            pow1[i]=pow1[i-1]*base1,pow2[i]=pow2[i-1]*base2;
        }
        for(i=n;i>=3;i--) 
        {
            tot[ch[i]-'a']^=1;
            if(tot[ch[i]-'a']==0) cnt--;
            else cnt++;
            _tot[i]=cnt;
        }
        memset(tot,0,sizeof(tot));
        cnt=0;
        for(i=2;i<n;i++)
        {
            tot[ch[i-1]-'a']^=1;
            if(tot[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            T.add(cnt+1,1);
            tmp=ff(1,i);
            for(j=1;i*j<n;j++)
            {   
                if(ff(i*j-i+1,i*j)!=tmp) break;
                ans+=T.find(_tot[i*j+1]+1);
            } 
        }
        printf("%lld
",ans);
    }
    return 0;
}

刚才的做法瓶颈在于要枚举(AB)的循环次数

从数据范围里还没有用到的特殊性质分析一下，发现还有一种字符和只有两种的在(AB)大小确定后((AB)^i)的奇数次字符数量是循环的。

所以(S_1=AB)与(S_2=ABABAB)的奇数次字符数量是一样的！而且这样的话(S_1)与(S_2)对应的(C)的奇数次字符数量也是一样的！换言之但我们知道(AB)的循环次数就可以(O(1))求出答案。

（(AB)循环最多的那种并不需要单独计算，因为(AB)中出现偶数次的没有影响，而奇数次的字符变成0可以当偶数来看）

下面介绍两种预处理次数的方法

法一：

令(num[x])为([1,x])为(AB)时的最多重复次数

若([1,x],[x+1,2x])相同时(num[x])至少为(num[2x]*2)，不超过这个数+1。只需再判断一下([1,x],[num[2x]*2x+1,num[2x]*2x+x])就行

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000,base1=1021831,base2=96269; 
ull hash1[N],hash2[N],pow1[N],pow2[N];
ull ff1(ll l,ll r) {return hash1[r]-hash1[l-1]*pow1[r-l+1];}
ull ff2(ll l,ll r) {return hash2[r]-hash2[l-1]*pow2[r-l+1];}
pair<ull,ull> ff(ll l,ll r) {return make_pair(ff1(l,r),ff2(l,r));}
struct BIT
{
    ll sum[30],n;
    void clear() {memset(sum,0,sizeof(sum));}
    inline ll lowbit(ll x) {return x&(-x);}
    void add(ll x,ll v) {while(x<=n) sum[x]+=v,x+=lowbit(x);}
    ll find(ll x){ll ans=0;while(x) ans+=sum[x],x-=lowbit(x);return ans;}
}T;
ll n,ans,book[30],cnt,tot[N],num[N];
char ch[N];
int main()
{
    ll Q,i;
    scanf("%lld",&Q);
    T.n=26;
    while(Q--)
    {
        T.clear();
        memset(book,0,sizeof(book));
        memset(tot,0,sizeof(tot));
        memset(num,0,sizeof(num));
        cnt=ans=0;
        scanf("%s",ch+1);
        n=strlen(ch+1);
        hash1[0]=hash2[0]=0,pow1[0]=pow2[0]=1;
        for(i=1;i<=n;i++) 
        {
            hash1[i]=hash1[i-1]*base1+ch[i],hash2[i]=hash2[i-1]*base2+ch[i];
            pow1[i]=pow1[i-1]*base1,pow2[i]=pow2[i-1]*base2;
        }
        for(i=n;i>=3;i--) 
        {
            book[ch[i]-'a']^=1;
            if(book[ch[i]-'a']==0) cnt--;
            else cnt++;
            tot[i]=cnt;
        }
        num[n-1]=1;
        for(i=n-2;i>=2;i--)
        {
        	if(i*2>n-1 || ff(1,i)!=ff(i+1,i*2)) {num[i]=1;continue;}
        	num[i]=num[i<<1]<<1;
        	if(num[i]*i+i<=n-1 && ff(1,i)==ff(num[i]*i+1,num[i]*i+i)) num[i]++;
		}
		cnt=0;
		memset(book,0,sizeof(book));
        for(i=2;i<n;i++)
        {
            book[ch[i-1]-'a']^=1;
            if(book[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            T.add(cnt+1,1);
            ans+=T.find(tot[i+1]+1)*((num[i]>>1)+(num[i]&1));
            ans+=T.find(tot[i*2+1]+1)*(num[i]>>1);
        }
        printf("%lld
",ans);
    }
    return 0;
}

法二：

众所周知，用kmp算法可以(O(1))求出 (字符串的最小循环周期= len-kmp[len] (失配位置)) 。因此我们可以用二分出(AB)的循环次数，二分的复杂度为

(sum_{i=1}^{n}log(frac{n}{i})=nlog n-sum_{i=1}^{n}log i)

乍一眼感觉这个东西不太行，但是稍微拆一下

(sum_{i=1}^{n}log i hickapprox n+(n-2^0)+(n-2^1)+···+(n-2^{log_{2}{n}}) hickapprox n*log n-n)

所以二分实际上复杂度只有(O(n))

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000;
struct BIT
{
    ll sum[30],n;
    void clear() {memset(sum,0,sizeof(sum));}
    inline ll lowbit(ll x) {return x&(-x);}
    void add(ll x,ll v) {while(x<=n) sum[x]+=v,x+=lowbit(x);}
    ll find(ll x){ll ans=0;while(x) ans+=sum[x],x-=lowbit(x);return ans;}
}T;
ll n,ans,book[30],cnt,tot[N],num[N],kmp[N],k;
char ch[N];
int main()
{
    ll Q,i,l,r,mid,res,len;
    scanf("%lld",&Q);
    T.n=26;
    while(Q--)
    {
        T.clear();
        memset(book,0,sizeof(book));
        memset(tot,0,sizeof(tot));
        memset(num,0,sizeof(num));
        memset(kmp,0,sizeof(kmp));
        cnt=ans=0;
        scanf("%s",ch);
        n=strlen(ch);
        for(i=n-1;i>=2;i--) 
        {
            book[ch[i]-'a']^=1;
            if(book[ch[i]-'a']==0) cnt--;
            else cnt++;
            tot[i]=cnt;
        }
        kmp[0]=kmp[1]=0;k=0;
        for(i=1;i<n;i++)
        {
        	while(k && ch[i]!=ch[k]) k=kmp[k];
        	if(ch[i]==ch[k]) k++;
        	kmp[i+1]=k;
		}
		for(i=1;i<n;i++)
		{
			l=2,r=(n-1)/(i+1),res=1;
			while(l<=r)
			{
				mid=l+r>>1;
				len=(i+1)*mid;
				if((len-1-kmp[len-1])!=0 && (i+1)%(len-kmp[len])==0) res=mid,l=mid+1;
				else r=mid-1;
			}
			num[i]=res;
		}
		cnt=0;
		memset(book,0,sizeof(book));
        for(i=1;i<n-1;i++)
        {
            book[ch[i-1]-'a']^=1;
            if(book[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            T.add(cnt+1,1);
            len=i+1;
            ans+=T.find(tot[len]+1)*((num[i]>>1)+(num[i]&1));
            ans+=T.find(tot[len*2]+1)*(num[i]>>1);
        }
        printf("%lld
",ans);
    }
    return 0;
}

后记：

有个憨憨跑到我跟前说这题可以做到(O(n))，我愣了一下，寻思着这(log 26)貌似也不大啊，是不是我听错了？“因为你发现这次与上次的(C)的奇数次字符数量仅仅差1，所以···”，我无语。~~出题人卡这个他人就没了~~

(O(n):)

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000,base1=1021831,base2=96269; 
ull hash1[N],hash2[N],pow1[N],pow2[N];
ull ff1(ll l,ll r) {return hash1[r]-hash1[l-1]*pow1[r-l+1];}
ull ff2(ll l,ll r) {return hash2[r]-hash2[l-1]*pow2[r-l+1];}
pair<ull,ull> ff(ll l,ll r) {return make_pair(ff1(l,r),ff2(l,r));}
ll n,ans,book[30],cnt,tot[N*2],num[N],sum[30];
char ch[N];
int main()
{
    ll Q,i,last1,last2;
    scanf("%lld",&Q);
    while(Q--)
    {
        memset(book,0,sizeof(book));
        memset(tot,0,sizeof(tot));
        memset(num,0,sizeof(num));
        memset(sum,0,sizeof(sum));
        cnt=ans=last1=last2=0;
        scanf("%s",ch+1);
        n=strlen(ch+1);
        hash1[0]=hash2[0]=0,pow1[0]=pow2[0]=1;
        for(i=1;i<=n;i++) 
        {
            hash1[i]=hash1[i-1]*base1+ch[i],hash2[i]=hash2[i-1]*base2+ch[i];
            pow1[i]=pow1[i-1]*base1,pow2[i]=pow2[i-1]*base2;
        }
        for(i=n;i>=2;i--) 
        {
            book[ch[i]-'a']^=1;
            if(book[ch[i]-'a']==0) cnt--;
            else cnt++;
            tot[i]=cnt;
        }
        num[n-1]=1;
        for(i=n-2;i>=2;i--)
        {
        	if(i*2>n-1 || ff(1,i)!=ff(i+1,i*2)) {num[i]=1;continue;}
        	num[i]=num[i<<1]<<1;
        	if(num[i]*i+i<=n-1 && ff(1,i)==ff(num[i]*i+1,num[i]*i+i)) num[i]++;
		}
		cnt=0;
		memset(book,0,sizeof(book));
        for(i=2;i<n;i++)
        {
            book[ch[i-1]-'a']^=1;
            if(book[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            ++sum[cnt];
            if(cnt<=tot[i]) last1++;
            if(cnt<=tot[(i-1)*2+1]) last2++;
            if(tot[i]<tot[i+1]) last1+=sum[tot[i+1]];
            else last1-=sum[tot[i]];
            ans+=last1*((num[i]>>1)+(num[i]&1));
            if(tot[(i-1)*2+1]<tot[i*2+1]) last2+=sum[tot[i*2+1]]+sum[tot[i*2+1]-1];
            else if(tot[(i-1)*2+1]>tot[i*2+1]) last2-=sum[tot[i*2+1]+1]+sum[tot[i*2+1]+2];
            ans+=last2*(num[i]>>1);
        }
        printf("%lld
",ans);
    }
    return 0;
}

跪求各位老爷一键三连~~点赞也行~~

相关阅读:
SVM理论之最优超平面
 回归系列之L1和L2正则化
 Logistic回归明明称呼为回归但为什么是分类算法？
LTE无线接入三层协议体系结构
 80 道大厂算法高频面试题
 linux内核调试技术之printk
计算机视觉岗常见面试题
 偏差(Bias)和方差(Variance)——机器学习中的模型选择
 正态分布x/y轴
 Python time strftime()方法
原文地址：https://www.cnblogs.com/lhc-yyl-lyx-lyh/p/14170674.html