• Codeforces 528D Fuzzy Search(FFT)


    题目

    Source

    http://codeforces.com/problemset/problem/528/D

    Description

    Leonid works for a small and promising start-up that works on decoding the human genome. His duties include solving complex problems of finding certain patterns in long strings consisting of letters 'A', 'T', 'G' and 'C'.

    Let's consider the following scenario. There is a fragment of a human DNA chain, recorded as a string S. To analyze the fragment, you need to find all occurrences of string T in a string S. However, the matter is complicated by the fact that the original chain fragment could contain minor mutations, which, however, complicate the task of finding a fragment. Leonid proposed the following approach to solve this problem.

    Let's write down integer k ≥ 0 — the error threshold. We will say that string T occurs in string S on position i (1 ≤ i ≤ |S| - |T| + 1), if after putting string T along with this position, each character of string T corresponds to the some character of the same value in string S at the distance of at most k. More formally, for any j (1 ≤ j ≤ |T|) there must exist such p (1 ≤ p ≤ |S|), that |(i + j - 1) - p| ≤ k and S[p] = T[j].

    For example, corresponding to the given definition, string "ACAT" occurs in string "AGCAATTCAT" in positions 2, 3 and 6.

    Note that at k = 0 the given definition transforms to a simple definition of the occurrence of a string in a string.

    Help Leonid by calculating in how many positions the given string T occurs in the given string S with the given error threshold.

    Input

    The first line contains three integers |S|, |T|, k (1 ≤ |T| ≤ |S| ≤ 200 000, 0 ≤ k ≤ 200 000) — the lengths of strings S and T and the error threshold.

    The second line contains string S.

    The third line contains string T.

    Both strings consist only of uppercase letters 'A', 'T', 'G' and 'C'.

    Output

    Print a single number — the number of occurrences of T in S with the error threshold k by the given definition.

    Sample Input

    10 4 1
    AGCAATTCAT
    ACAT

    Sample Output

    3

    分析

    题目大概相当于说给一个主串和模式串,主串各个位置i的字符可以等价于[i-k,i+k]位置中的任意一个字符,问模式串在主串中能匹配几次。

    首先O(n)扫一遍主串就可以预处理出主串各个位置等价的字符集合,然后就是主串有多少个子串和模式串匹配的问题了。

    这其实是FFT的经典应用:快速求出模式串某字符在主串所有位置中有多少个被匹配。通过枚举各个字符反转模式串构造多项式用FFT求乘积即可得出,LA4671

    代码

    #include<cstdio>
    #include<cstring>
    #include<cmath>
    #include<algorithm>
    using namespace std;
    #define INF (1<<30)
    #define MAXN 555555
    const double PI=acos(-1.0);
     
    struct Complex{
    	double real,imag;
    	Complex(double _real,double _imag):real(_real),imag(_imag){}
    	Complex(){}
    	Complex operator+(const Complex &cp) const{
    		return Complex(real+cp.real,imag+cp.imag);
    	}
    	Complex operator-(const Complex &cp) const{
    		return Complex(real-cp.real,imag-cp.imag);
    	}
    	Complex operator*(const Complex &cp) const{
    		return Complex(real*cp.real-imag*cp.imag,real*cp.imag+cp.real*imag);
    	}
    	void setValue(double _real=0,double _imag=0){
    		real=_real; imag=_imag;
    	}
    };
     
    int len;
    Complex wn[MAXN],wn_anti[MAXN];
     
    void FFT(Complex y[],int op){
    	for(int i=1,j=len>>1,k; i<len-1; ++i){
    		if(i<j) swap(y[i],y[j]);
    		k=len>>1;
    		while(j>=k){
    			j-=k;
    			k>>=1;
    		}
    		if(j<k) j+=k;
    	}
    	for(int h=2; h<=len; h<<=1){
    		Complex Wn=(op==1?wn[h]:wn_anti[h]);
    		for(int i=0; i<len; i+=h){
    			Complex W(1,0);
    			for(int j=i; j<i+(h>>1); ++j){
    				Complex u=y[j],t=W*y[j+(h>>1)];
    				y[j]=u+t;
    				y[j+(h>>1)]=u-t;
    				W=W*Wn;
    			}
    		}
    	}
    	if(op==-1){
    		for(int i=0; i<len; ++i) y[i].real/=len;
    	}
    }
    void Convolution(Complex A[],Complex B[],int n){
    	for(len=1; len<(n<<1); len<<=1);
    	for(int i=n; i<len; ++i){
    		A[i].setValue();
    		B[i].setValue();
    	}
    	
    	FFT(A,1); FFT(B,1);
    	for(int i=0; i<len; ++i){
    		A[i]=A[i]*B[i];
    	}
    	FFT(A,-1);
    }
    
    char S[222222],T[222222];
    int cnt[4];
    int get_idx(char ch){
    	if(ch=='A') return 0;
    	if(ch=='T') return 1;
    	if(ch=='C') return 2;
    	if(ch=='G') return 3;
    	return -1;
    }
    
    int sta[222222],ans[MAXN];
    Complex A[MAXN],B[MAXN];
    
    int main(){
    	for(int i=0; i<MAXN; ++i){
    		wn[i].setValue(cos(2.0*PI/i),sin(2.0*PI/i));
    		wn_anti[i].setValue(wn[i].real,-wn[i].imag);
    	}
    	int n,m,k;
    	scanf("%d%d%d",&n,&m,&k);
    	scanf("%s%s",S,T);
    	int l=0,r=min(n,k)-1;
    	for(int i=l; i<=r; ++i) ++cnt[get_idx(S[i])];
    	for(int i=0; i<n; ++i){
    		if(i-l>k) --cnt[get_idx(S[l++])];
    		if(r+1<n) ++cnt[get_idx(S[++r])];
    		
    		for(int j=0; j<4; ++j){
    			if(cnt[j]) sta[i]|=(1<<j);
    		}
    	}
    	for(int i=0; i<4; ++i){
    		for(int j=0; j<len; ++j){
    			A[j].setValue();
    			B[j].setValue();
    		}
    		for(int j=0; j<m; ++j){
    			if(get_idx(T[j])==i) B[m-j-1].setValue(1);
    		}
    		for(int j=0; j<n; ++j){
    			if(sta[j]>>i&1) A[j].setValue(1);
    		}
    		Convolution(A,B,n);
    		for(int j=0; j<len; ++j){
    			ans[j]+=(int)(A[j].real+0.5);
    		}
    	}
    	int res=0;
    	for(int i=0; i<len; ++i){
    		if(ans[i]==m) ++res;
    	}
    	printf("%d",res);
    	return 0;
    }
    
  • 相关阅读:
    javascript、CSS、XML动太生成树菜单
    设表格细钱
    JS操作JSON总结
    Meta标签详解
    js中eval详解
    搜虎网上线
    Fluent NHibernate demo 示例
    Oracle 索引概述
    程序员如何加薪——请看《谁赚走了你的薪水_让经济学帮你加薪].(英)蒂姆·哈福德.扫描版[ED2000.COM].pdf》
    expertoracledatabasearchitectureoracledatabaseprogramming9i10gand11gtechniquesandsoluti.pdf
  • 原文地址:https://www.cnblogs.com/WABoss/p/5847443.html
Copyright © 2020-2023  润新知