• 字符串哈希专题



    layout: post
    title: 字符串哈希专题
    author: "luowentaoaa"
    catalog: true
    tags:
    mathjax: true
    - 字符串


    传送门

    摘要 哈希进制转换

    题意

    一个字符串分成长度为N的字串。且不同的字符不会超过NC个。问总共有多少个不同的子串

    思路

    以nc作为进制,把一个子串化为这个进制下的数,再用哈希判断

    #include<cstdio>
    #include<iostream>
    #include<cstring>
    #include<string>
    #include<set>
    #include<vector>
    using namespace std;
    typedef long long ll;
    typedef unsigned long long ull;
    #define pp pair<int,int>
    const ll mod=998244353;
    const int maxn=1e7+50;
    const ll inf=0x3f3f3f3f3f3f3f3fLL;
    int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
    int lcm(int a,int b){return a*b/gcd(a,b);}
    const ull Seed_Pool[]={146527,19260817};
    const ull Mod_Pool[]={1000000009,998244353};
    struct Hash{
        ull SEED,MOD;
        vector<ull>p,h;
        Hash(){}
        Hash(const char* s,const int& seed_index,const int& mod_index){
            SEED=Seed_Pool[seed_index];
            MOD=Mod_Pool[mod_index];
            int n=strlen(s);
            p.resize(n+1),h.resize(n+1);
            p[0]=1;
            for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
            for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
        }
        ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
        ull substr(int l,int m){return get(l,l+m);}
    };
    set<ull>st;
    char s[16000005];
    bool a[maxn*2];
    int ha[256];
    int main()
    {
        int n,nc;
        cin>>n>>nc>>s;
        int cnt=0;
        memset(ha,-1,sizeof(ha));
        memset(a,false,sizeof(a));
        int len=strlen(s);
        for(int i=0;i<len;i++){
            if(ha[s[i]]==-1)ha[s[i]]=cnt++;
        }
        int res=0;
        for(int i=0;i+n<=len;i++){
            int sum=0;
            for(int j=i;j<i+n;j++){
                sum*=nc;
                sum+=ha[s[j]];
            }
            if(!a[sum])res++,a[sum]=true;
        }
        cout<<res<<endl;
        return 0;
    }
    

    C.POJ - 2774 Long Long Message

    两个字符串最长子串长度

    题意

    求两个字符串的最长子串长度

    题解

    二分长度,然后把字符串A的长度mid的哈希值塞入数组,再在字符串B的数组中二分查找长度为mid

    复杂度为O(logn×N×logN)

    也可以直接用后缀数组的height

    #include    <cstring>
    #include   <iostream>
    #include  <algorithm>
    #include     <string>
    #include     <vector>
    #include        <set>
    using namespace std;
    typedef long long ll;
    typedef unsigned long long ull;
    #define pp pair<int,int>
    const ll mod=998244353;
    const int maxn=1e6+50;
    const ll inf=0x3f3f3f3f3f3f3f3fLL;
    int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
    int lcm(int a,int b){return a*b/gcd(a,b);}
    const ull seed=19260817;
    struct Hash{
        vector<ull>p,h;
        Hash(){}
        Hash(const string& s){
            int n=s.length();
            p.resize(n+1),h.resize(n+1);
            p[0]=1;
            for(int i=1;i<=n;i++)p[i]=p[i-1]*seed;
            for(int i=1;i<=n;i++)h[i]=(h[i-1]*seed+s[i-1]);
        }
        ull get(int l,int r){return(h[r]-h[l]*p[r-l]);}
        ull substr(int l,int m){return get(l,l+m);}
    }A,B;
    int n,m;
    bool ok(int mid){
        vector<ull>ve;
        for(int i=0;i<=n-mid;i++){
            ve.push_back(A.substr(i,mid));
        }
        sort(ve.begin(),ve.end());
        for(int i=0;i<=m-mid;i++){
            if(binary_search(ve.begin(),ve.end(),B.substr(i,mid))){
                return true;
            }
        }
        return false;
    }
    int main()
    {
        std::ios::sync_with_stdio(false);
        std::cin.tie(0);
        std::cout.tie(0);
        string a,b;
        cin>>a>>b;
        n=a.length(),m=b.length();
        if(n>m){swap(a,b);swap(n,m);}
        A=Hash(a);B=Hash(b);
        int l=0,r=n;
        int haha=0;
        while(r-l>=0){
            int mid=(r+l)>>1;
           // cout<<mid<<endl;
            if(ok(mid)){
                haha=mid;
                l=mid+1;
            }
            else r=mid-1;
        }
        cout<<haha<<endl;
        return 0;
    }
    

    D.URAL - 1989 Subpalindromes

    线段树/树状数组和哈希应用 判断回文

    题意

    给定一个字符串(长度<=100000),有两个操作。 1:改变某个字符。 2:判断某个子串是否构成回文串。

    题解

    把字符串正向,方向插入线段树和树状数组中,然后单点修改,区间查值, 如果正向和方向值一样,那就是回文了

    //线段树
    #include<bits/stdc++.h>
    using namespace std;
    typedef long long ll;
    typedef unsigned long long ull;
    #define pp pair<int,int>
    const ll mod=998244353;
    const int maxn=1e5+50;
    const ll inf=0x3f3f3f3f3f3f3f3fLL;
    #define lson (x<<1)
    #define rson ((x<<1)|1)
    int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
    int lcm(int a,int b){return a*b/gcd(a,b);}
    ull bit[maxn];
    string s;
    struct node{
        int l,r;
        ull sum1,sum2;
    }my[maxn<<2];
    int n;
    void pushup(int x){
        my[x].sum1=my[lson].sum1+my[rson].sum1;
        my[x].sum2=my[lson].sum2+my[rson].sum2;
    }
    void build(int x,int l,int r){
        my[x].l=l;my[x].r=r;
        if(my[x].l==my[x].r){
            my[x].sum1=bit[l-1]*(s[l-1]-'a');
            my[x].sum2=bit[n-l]*(s[l-1]-'a');
            return;
        }
        int mid=(l+r)>>1;
        build(lson,l,mid);
        build(rson,mid+1,r);
        pushup(x);
    }
    ull one,two;
    void update(int x,int pos,int val){
        if(my[x].l==my[x].r){
            my[x].sum1=bit[pos-1]*val;
            my[x].sum2=bit[n-pos]*val;
            return;
        }
        int mid=(my[x].l+my[x].r)>>1;
        if(pos<=mid)
            update(lson,pos,val);
        else
            update(rson,pos,val);
        pushup(x);
    }
    void query(int x,int l,int r){
        if(my[x].l>=l&&my[x].r<=r){
            one+=my[x].sum1;
            two+=my[x].sum2;
            return;
        }
        int mid=(my[x].l+my[x].r)>>1;
        if(l<=mid)query(lson,l,r);
        if(r>mid)query(rson,l,r);
    }
    int main()
    {
        std::ios::sync_with_stdio(false);
        std::cin.tie(0);
        std::cout.tie(0);
        cin>>s;n=s.length();int t;
        cin>>t;
        bit[0]=1;
        for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
        build(1,1,n);
        while(t--){
            char s[50];
            cin>>s;
            if(s[0]=='p'){
                int x,y;
                cin>>x>>y;
                one=0;two=0;
                query(1,x,y);
                if((x-1)>(n-y))two*=bit[(x-1)-(n-y)];
                else one*=bit[(n-y)-(x-1)];
                if(one==two)cout<<"Yes"<<endl;
                else cout<<"No"<<endl;
            }
            else{
                int x;char ch;
                cin>>x>>ch;
                update(1,x,ch-'a');
            }
        }
        return 0;
    }
    
    //树状数组
    #include<bits/stdc++.h>
    using namespace std;
    typedef long long ll;
    typedef unsigned long long ull;
    #define pp pair<int,int>
    const ll mod=998244353;
    const int maxn=1e5+50;
    const ll inf=0x3f3f3f3f3f3f3f3fLL;
    int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
    int lcm(int a,int b){return a*b/gcd(a,b);}
    ull bit[maxn],c[maxn][2];
    int n,len;
    inline int lowbit(int x){
        return x&(-x);
    }
    void update(int x,ull val,int flag){
        while(x<maxn){
            c[x][flag]+=val;
            x+=lowbit(x);
        }
    }
    ull sum(int x,int flag){
        ull cnt=0;
        while(x){
            cnt+=c[x][flag];
            x-=lowbit(x);
        }
        return cnt;
    }
    string s;
    string str;
    int main()
    {
        std::ios::sync_with_stdio(false);
        std::cin.tie(0);
        std::cout.tie(0);
        bit[0]=1;
        for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
        while(cin>>str){
            len=str.length();
            memset(c,0,sizeof(c));
            for(int i=0;i<len;i++){
                update(i+1,(str[i]-'a'+1)*bit[i],0);
                update(i+1,(str[len-i-1]-'a'+1)*bit[i],1);
            }
            cin>>n;
            int l,r;
            while(n--){
                cin>>s;
                if(s[0]=='p'){
                    cin>>l>>r;
                    ull a=(sum(r,0)-sum(l-1,0))*bit[len-r];
                    ull b=(sum(len-l+1,1)-sum(len-r,1))*bit[l-1];
                    if(a==b)cout<<"Yes"<<endl;
                    else cout<<"No"<<endl;
                }
                else{
                    int w;
                    char ch;
                    cin>>w>>ch;
                    update(w,(ch-str[w-1])*bit[w-1],0);
                    update(len-w+1,(ch-str[w-1])*bit[len-w],1);
                    str[w-1]=ch;
                }
            }
        }
        return 0;
    }
    

    E.CodeForces - 580E Kefa and Watch

    线段树+哈希

    题意

    给你一个长度为n的字符串s,有两种操作:

    1 L R C : 把s[l,r]全部变为c;

    2 L R d : 询问s[l,r]是否是周期为d的重复串。

    题解

    n最大为1e5,且m+k最大也为1e5,这就要求操作1和操作2都要采用logn的算法,所以用线段树.

    对于更新操作,使用区间更新就可解决。

    主要是如何在logn的时间内完成询问操作.

    我们采用线段树维护hash值的方法.

    结合于类似KMP的性质,我们发现,字符串[l,r]有长度为w的循环节,只需要使得[l,r-w]=[l+w,r]即可。证明过程看这里

    这题的hash不同于普通的字符串hash,因为涉及到动态修改,所以需要预先处理出所有的base,在修改的时候直接用.

    #include<bits/stdc++.h>
    using namespace std;
    typedef long long ll;
    typedef long long ull;
    #define pp pair<int,int>
    const ll mod=998244353;
    const int maxn=1e5+50;
    const ll inf=0x3f3f3f3f3f3f3f3fLL;
    int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
    int lcm(int a,int b){return a*b/gcd(a,b);}
    ull seed=19260817;
    //ull seed=10;
    ull s[maxn];
    ull fs[maxn];
    char ss[maxn];
    void init(){
        s[0]=1;fs[0]=1;
        for(int i=1;i<maxn;i++)s[i]=(s[i-1]*seed)%mod;
        for(int i=1;i<maxn;i++)fs[i]=(fs[i-1]+s[i])%mod;
       /* for(int i=0;i<5;i++){
            cout<<i<<"	"<<s[i]<<"	"<<fs[i]<<endl;
        }*/
    }
    struct node{
        int l,r;
        int lazy;
        int ok;
        ull num;
    }my[maxn<<2];
    void pushup(int x){
        int mid=(my[x].l+my[x].r)>>1;
       // printf("x==%d x<<1=%d x<<1|1=%d  my[x<<1].num=%llu my[x<<1|1].num=%llu s==%d   
    ",x,x<<1,x<<1|1,my[x<<1].num,my[(x<<1)|1].num,s[my[x].r-mid]);
        my[x].num=(my[x<<1].num*s[my[x].r-mid]+my[(x<<1|1)].num)%mod;
       // cout<<"x=="<<x<<" my[x].num"<<my[x].num<<endl;
    }
    void pushdown(int x){
        if(my[x].lazy){
            int mid=(my[x].l+my[x].r)>>1;
            my[x<<1].lazy=my[(x<<1)|1].lazy=my[x].lazy;
            my[x<<1].ok=my[x<<1|1].ok=my[x].ok;
            my[x<<1].num=(fs[mid-my[x].l]*my[x].ok)%mod;
            my[(x<<1)|1].num=(fs[my[x].r-mid-1]*my[x].ok)%mod;
            my[x].lazy=0;
        }
    }
    void build(int x,int l,int r){
        my[x].l=l;my[x].r=r;my[x].lazy=0;
        if(my[x].l==my[x].r){
            my[x].num=ss[l-1]-'0';
           // printf("my[%d].num=%d
    ",x,my[x].num);
            return;
        }
        int mid=(l+r)>>1;
        build(x<<1,l,mid);
        build((x<<1)|1,mid+1,r);
        pushup(x);
    }
    void update(int x,int l,int r,int k){
        if(my[x].l>=l&&my[x].r<=r){
            my[x].num=(fs[my[x].r-my[x].l]*k)%mod;
            my[x].ok=k;
            my[x].lazy=1;
            return;
        }
        pushdown(x);
        int mid=(my[x].l+my[x].r)>>1;
        if(l<=mid)update(x<<1,l,r,k);
        if(r>mid)update(x<<1|1,l,r,k);
        pushup(x);
    }
    ull query(int x,int l,int r){
        if(my[x].l>=l&&my[x].r<=r)return my[x].num;
        pushdown(x);
        int mid=(my[x].l+my[x].r)>>1;
        if(l>mid)return query(x<<1|1,l,r);
        else if(r<=mid)return query(x<<1,l,r);
        else{
            ull t1=query(x<<1,l,r);
            ull t2=query(x<<1|1,l,r);
            int k=min(r,my[x].r)-mid;
            return (t1*s[k]+t2)%mod;
        }
        pushup(x);
    }
    void pri(int n){
        for(int i=1;i<=n*4;i++){
            printf("my[%d].num=%llu
    ",i,my[i].num);
        }
    }
    int main()
    {
       /* std::ios::sync_with_stdio(false);
        std::cin.tie(0);
        std::cout.tie(0);*/
        init();
        int n,q,t;
        scanf("%d%d%d",&n,&q,&t);
        q+=t;
        scanf("%s",ss);
        int len=strlen(ss);
        build(1,1,len);
       // pri(len);
        for(int i=0;i<q;i++){
            int op,l,r,d;
            scanf("%d%d%d%d",&op,&l,&r,&d);
            if(op==1)update(1,l,r,d);
            else {
                if(d==r-l+1){
                    printf("YES
    ");
                    continue;
                }
                ull one=query(1,l,r-d);
             //   cout<<"one="<<one<<endl;
                ull two=query(1,l+d,r);
               // cout<<"two="<<two<<endl;
                if(one==two)printf("YES
    ");
                else printf("NO
    ");
            }
        }
    
        return 0;
    }
    

    H.HDU - 1686 Oulipo

    哈希水题,求模式串出现次数

    #include<cstdio>
    #include<iostream>
    #include<cstring>
    #include<string>
    #include<set>
    #include<vector>
    using namespace std;
    typedef long long ll;
    typedef unsigned long long ull;
    #define pp pair<int,int>
    const ll mod=998244353;
    const int maxn=1e7+50;
    const ll inf=0x3f3f3f3f3f3f3f3fLL;
    int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
    int lcm(int a,int b){return a*b/gcd(a,b);}
    const ull Seed_Pool[]={146527,19260817};
    const ull Mod_Pool[]={1000000009,998244353};
    struct Hash{
        ull SEED,MOD;
        vector<ull>p,h;
        Hash(){}
        Hash(const string& s,const int& seed_index,const int& mod_index){
            SEED=Seed_Pool[seed_index];
            MOD=Mod_Pool[mod_index];
            int n=s.length();
            p.resize(n+1),h.resize(n+1);
            p[0]=1;
            for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
            for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
        }
        ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
        ull substr(int l,int m){return get(l,l+m);}
    };
    int main()
    {
        int t;
        ios::sync_with_stdio(false);
        cin>>t;
        while(t--){
            string s;
            cin>>s;
            int n=s.length();
            Hash aa=Hash(s,0,0);
            ull a=aa.substr(0,n);
            cin>>s;
            int nn=s.length();
            aa=Hash(s,0,0);
            //cout<<"aa="<<a<<endl;
            int sum=0;
            for(int i=0;i+n<=nn;i++){
                if(aa.substr(i,n)==a){
                    //cout<<aa.substr(i,n)<<endl;
                    sum++;
                }
            }
            cout<<sum<<endl;
        }
        return 0;
    }
    
  • 相关阅读:
    关于【最小生成树】
    题目1096:日期差值 (2009年上海交通大学计算机研究生机试真题)
    题目1432:叠筐 题目1126:打印极值点下标
    利用栈解决一些基本问题
    Redis与Python交互
    MongoDB与python 交互
    MongoDB安装
    mongodb基本操作
    MySQL与Python交互
    C++11 bind和function用法
  • 原文地址:https://www.cnblogs.com/luowentao/p/10332316.html
Copyright © 2020-2023  润新知