layout: post
title: 字符串哈希专题
author: "luowentaoaa"
catalog: true
tags:
mathjax: true
- 字符串
A.POJ - 1200 A - Crazy Search
摘要 哈希进制转换
题意
一个字符串分成长度为N的字串。且不同的字符不会超过NC个。问总共有多少个不同的子串
思路
以nc作为进制,把一个子串化为这个进制下的数,再用哈希判断
#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
ull SEED,MOD;
vector<ull>p,h;
Hash(){}
Hash(const char* s,const int& seed_index,const int& mod_index){
SEED=Seed_Pool[seed_index];
MOD=Mod_Pool[mod_index];
int n=strlen(s);
p.resize(n+1),h.resize(n+1);
p[0]=1;
for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
}
ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
ull substr(int l,int m){return get(l,l+m);}
};
set<ull>st;
char s[16000005];
bool a[maxn*2];
int ha[256];
int main()
{
int n,nc;
cin>>n>>nc>>s;
int cnt=0;
memset(ha,-1,sizeof(ha));
memset(a,false,sizeof(a));
int len=strlen(s);
for(int i=0;i<len;i++){
if(ha[s[i]]==-1)ha[s[i]]=cnt++;
}
int res=0;
for(int i=0;i+n<=len;i++){
int sum=0;
for(int j=i;j<i+n;j++){
sum*=nc;
sum+=ha[s[j]];
}
if(!a[sum])res++,a[sum]=true;
}
cout<<res<<endl;
return 0;
}
C.POJ - 2774 Long Long Message
两个字符串最长子串长度
题意
求两个字符串的最长子串长度
题解
二分长度,然后把字符串A的长度mid的哈希值塞入数组,再在字符串B的数组中二分查找长度为mid
复杂度为O(logn×N×logN)
也可以直接用后缀数组的height
#include <cstring>
#include <iostream>
#include <algorithm>
#include <string>
#include <vector>
#include <set>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e6+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull seed=19260817;
struct Hash{
vector<ull>p,h;
Hash(){}
Hash(const string& s){
int n=s.length();
p.resize(n+1),h.resize(n+1);
p[0]=1;
for(int i=1;i<=n;i++)p[i]=p[i-1]*seed;
for(int i=1;i<=n;i++)h[i]=(h[i-1]*seed+s[i-1]);
}
ull get(int l,int r){return(h[r]-h[l]*p[r-l]);}
ull substr(int l,int m){return get(l,l+m);}
}A,B;
int n,m;
bool ok(int mid){
vector<ull>ve;
for(int i=0;i<=n-mid;i++){
ve.push_back(A.substr(i,mid));
}
sort(ve.begin(),ve.end());
for(int i=0;i<=m-mid;i++){
if(binary_search(ve.begin(),ve.end(),B.substr(i,mid))){
return true;
}
}
return false;
}
int main()
{
std::ios::sync_with_stdio(false);
std::cin.tie(0);
std::cout.tie(0);
string a,b;
cin>>a>>b;
n=a.length(),m=b.length();
if(n>m){swap(a,b);swap(n,m);}
A=Hash(a);B=Hash(b);
int l=0,r=n;
int haha=0;
while(r-l>=0){
int mid=(r+l)>>1;
// cout<<mid<<endl;
if(ok(mid)){
haha=mid;
l=mid+1;
}
else r=mid-1;
}
cout<<haha<<endl;
return 0;
}
D.URAL - 1989 Subpalindromes
线段树/树状数组和哈希应用 判断回文
题意
给定一个字符串(长度<=100000),有两个操作。 1:改变某个字符。 2:判断某个子串是否构成回文串。
题解
把字符串正向,方向插入线段树和树状数组中,然后单点修改,区间查值, 如果正向和方向值一样,那就是回文了
//线段树
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
#define lson (x<<1)
#define rson ((x<<1)|1)
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn];
string s;
struct node{
int l,r;
ull sum1,sum2;
}my[maxn<<2];
int n;
void pushup(int x){
my[x].sum1=my[lson].sum1+my[rson].sum1;
my[x].sum2=my[lson].sum2+my[rson].sum2;
}
void build(int x,int l,int r){
my[x].l=l;my[x].r=r;
if(my[x].l==my[x].r){
my[x].sum1=bit[l-1]*(s[l-1]-'a');
my[x].sum2=bit[n-l]*(s[l-1]-'a');
return;
}
int mid=(l+r)>>1;
build(lson,l,mid);
build(rson,mid+1,r);
pushup(x);
}
ull one,two;
void update(int x,int pos,int val){
if(my[x].l==my[x].r){
my[x].sum1=bit[pos-1]*val;
my[x].sum2=bit[n-pos]*val;
return;
}
int mid=(my[x].l+my[x].r)>>1;
if(pos<=mid)
update(lson,pos,val);
else
update(rson,pos,val);
pushup(x);
}
void query(int x,int l,int r){
if(my[x].l>=l&&my[x].r<=r){
one+=my[x].sum1;
two+=my[x].sum2;
return;
}
int mid=(my[x].l+my[x].r)>>1;
if(l<=mid)query(lson,l,r);
if(r>mid)query(rson,l,r);
}
int main()
{
std::ios::sync_with_stdio(false);
std::cin.tie(0);
std::cout.tie(0);
cin>>s;n=s.length();int t;
cin>>t;
bit[0]=1;
for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
build(1,1,n);
while(t--){
char s[50];
cin>>s;
if(s[0]=='p'){
int x,y;
cin>>x>>y;
one=0;two=0;
query(1,x,y);
if((x-1)>(n-y))two*=bit[(x-1)-(n-y)];
else one*=bit[(n-y)-(x-1)];
if(one==two)cout<<"Yes"<<endl;
else cout<<"No"<<endl;
}
else{
int x;char ch;
cin>>x>>ch;
update(1,x,ch-'a');
}
}
return 0;
}
//树状数组
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn],c[maxn][2];
int n,len;
inline int lowbit(int x){
return x&(-x);
}
void update(int x,ull val,int flag){
while(x<maxn){
c[x][flag]+=val;
x+=lowbit(x);
}
}
ull sum(int x,int flag){
ull cnt=0;
while(x){
cnt+=c[x][flag];
x-=lowbit(x);
}
return cnt;
}
string s;
string str;
int main()
{
std::ios::sync_with_stdio(false);
std::cin.tie(0);
std::cout.tie(0);
bit[0]=1;
for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
while(cin>>str){
len=str.length();
memset(c,0,sizeof(c));
for(int i=0;i<len;i++){
update(i+1,(str[i]-'a'+1)*bit[i],0);
update(i+1,(str[len-i-1]-'a'+1)*bit[i],1);
}
cin>>n;
int l,r;
while(n--){
cin>>s;
if(s[0]=='p'){
cin>>l>>r;
ull a=(sum(r,0)-sum(l-1,0))*bit[len-r];
ull b=(sum(len-l+1,1)-sum(len-r,1))*bit[l-1];
if(a==b)cout<<"Yes"<<endl;
else cout<<"No"<<endl;
}
else{
int w;
char ch;
cin>>w>>ch;
update(w,(ch-str[w-1])*bit[w-1],0);
update(len-w+1,(ch-str[w-1])*bit[len-w],1);
str[w-1]=ch;
}
}
}
return 0;
}
E.CodeForces - 580E Kefa and Watch
线段树+哈希
题意
给你一个长度为n的字符串s,有两种操作:
1 L R C : 把s[l,r]全部变为c;
2 L R d : 询问s[l,r]是否是周期为d的重复串。
题解
n最大为1e5,且m+k最大也为1e5,这就要求操作1和操作2都要采用logn的算法,所以用线段树.
对于更新操作,使用区间更新就可解决。
主要是如何在logn的时间内完成询问操作.
我们采用线段树维护hash值的方法.
结合于类似KMP的性质,我们发现,字符串[l,r]有长度为w的循环节,只需要使得[l,r-w]=[l+w,r]即可。证明过程看这里
这题的hash不同于普通的字符串hash,因为涉及到动态修改,所以需要预先处理出所有的base,在修改的时候直接用.
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull seed=19260817;
//ull seed=10;
ull s[maxn];
ull fs[maxn];
char ss[maxn];
void init(){
s[0]=1;fs[0]=1;
for(int i=1;i<maxn;i++)s[i]=(s[i-1]*seed)%mod;
for(int i=1;i<maxn;i++)fs[i]=(fs[i-1]+s[i])%mod;
/* for(int i=0;i<5;i++){
cout<<i<<" "<<s[i]<<" "<<fs[i]<<endl;
}*/
}
struct node{
int l,r;
int lazy;
int ok;
ull num;
}my[maxn<<2];
void pushup(int x){
int mid=(my[x].l+my[x].r)>>1;
// printf("x==%d x<<1=%d x<<1|1=%d my[x<<1].num=%llu my[x<<1|1].num=%llu s==%d
",x,x<<1,x<<1|1,my[x<<1].num,my[(x<<1)|1].num,s[my[x].r-mid]);
my[x].num=(my[x<<1].num*s[my[x].r-mid]+my[(x<<1|1)].num)%mod;
// cout<<"x=="<<x<<" my[x].num"<<my[x].num<<endl;
}
void pushdown(int x){
if(my[x].lazy){
int mid=(my[x].l+my[x].r)>>1;
my[x<<1].lazy=my[(x<<1)|1].lazy=my[x].lazy;
my[x<<1].ok=my[x<<1|1].ok=my[x].ok;
my[x<<1].num=(fs[mid-my[x].l]*my[x].ok)%mod;
my[(x<<1)|1].num=(fs[my[x].r-mid-1]*my[x].ok)%mod;
my[x].lazy=0;
}
}
void build(int x,int l,int r){
my[x].l=l;my[x].r=r;my[x].lazy=0;
if(my[x].l==my[x].r){
my[x].num=ss[l-1]-'0';
// printf("my[%d].num=%d
",x,my[x].num);
return;
}
int mid=(l+r)>>1;
build(x<<1,l,mid);
build((x<<1)|1,mid+1,r);
pushup(x);
}
void update(int x,int l,int r,int k){
if(my[x].l>=l&&my[x].r<=r){
my[x].num=(fs[my[x].r-my[x].l]*k)%mod;
my[x].ok=k;
my[x].lazy=1;
return;
}
pushdown(x);
int mid=(my[x].l+my[x].r)>>1;
if(l<=mid)update(x<<1,l,r,k);
if(r>mid)update(x<<1|1,l,r,k);
pushup(x);
}
ull query(int x,int l,int r){
if(my[x].l>=l&&my[x].r<=r)return my[x].num;
pushdown(x);
int mid=(my[x].l+my[x].r)>>1;
if(l>mid)return query(x<<1|1,l,r);
else if(r<=mid)return query(x<<1,l,r);
else{
ull t1=query(x<<1,l,r);
ull t2=query(x<<1|1,l,r);
int k=min(r,my[x].r)-mid;
return (t1*s[k]+t2)%mod;
}
pushup(x);
}
void pri(int n){
for(int i=1;i<=n*4;i++){
printf("my[%d].num=%llu
",i,my[i].num);
}
}
int main()
{
/* std::ios::sync_with_stdio(false);
std::cin.tie(0);
std::cout.tie(0);*/
init();
int n,q,t;
scanf("%d%d%d",&n,&q,&t);
q+=t;
scanf("%s",ss);
int len=strlen(ss);
build(1,1,len);
// pri(len);
for(int i=0;i<q;i++){
int op,l,r,d;
scanf("%d%d%d%d",&op,&l,&r,&d);
if(op==1)update(1,l,r,d);
else {
if(d==r-l+1){
printf("YES
");
continue;
}
ull one=query(1,l,r-d);
// cout<<"one="<<one<<endl;
ull two=query(1,l+d,r);
// cout<<"two="<<two<<endl;
if(one==two)printf("YES
");
else printf("NO
");
}
}
return 0;
}
H.HDU - 1686 Oulipo
哈希水题,求模式串出现次数
#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
ull SEED,MOD;
vector<ull>p,h;
Hash(){}
Hash(const string& s,const int& seed_index,const int& mod_index){
SEED=Seed_Pool[seed_index];
MOD=Mod_Pool[mod_index];
int n=s.length();
p.resize(n+1),h.resize(n+1);
p[0]=1;
for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
}
ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
ull substr(int l,int m){return get(l,l+m);}
};
int main()
{
int t;
ios::sync_with_stdio(false);
cin>>t;
while(t--){
string s;
cin>>s;
int n=s.length();
Hash aa=Hash(s,0,0);
ull a=aa.substr(0,n);
cin>>s;
int nn=s.length();
aa=Hash(s,0,0);
//cout<<"aa="<<a<<endl;
int sum=0;
for(int i=0;i+n<=nn;i++){
if(aa.substr(i,n)==a){
//cout<<aa.substr(i,n)<<endl;
sum++;
}
}
cout<<sum<<endl;
}
return 0;
}