• poj_1204 Trie图


    题目大意

        给出一个RxC的字符组成的puzzle,中间可以从左向右,从右到左,从上到下,从下到上,从左上到右下,从右下到左上,从左下到右上,从右上到左下,八个方向进行查找字符串。 
        给出M个字符串,找出他们在puzzle中的位置,返回该字符串在puzzle中的起点横纵坐标以及方向。

    字符串长度L <=1000, R,C <= 1000, W <= 1000

    题目分析

        多模式串的字符串匹配问题,考虑使用Trie图。将M个待查的字符串作为模式串插入Trie图中,然后设置前缀指针,构造DFA。 
        查找的时候,在puzzle的四个边上每个点,沿8个方向分别确定最长的串作为母串,用母串在Trie图上进行行走,进行匹配。

    题目比较坑的是,special judge没能对有些正确的结果给AC,只能按照“正常”的顺序来查找。

    实现(c++)

    #define _CRT_SECURE_NO_WARNINGS
    #include<stdio.h>
    #include<string.h>
    #include<vector>
    #include<deque>
    using namespace std;
    #define MAX_SIZE 1005
    #define LETTERS 26
    #define MAX_NODES 150000
    #define MIN(a, b) a < b? a :b
    char gPizza[MAX_SIZE][MAX_SIZE];
    struct PosInfo{
    	int row;
    	int col;
    	int dir;
    	void SetInfo(int r, int c, int d){
    		row = r;
    		col = c;
    		dir = d;
    	}
    };
    PosInfo gPosInfo[MAX_SIZE];
    
    
    
    struct Node{
    	Node* childs[LETTERS];
    	bool danger_node;
    	Node* prev;
    	int pattern_index;
    	Node(){
    		memset(childs, 0, sizeof(childs));
    		prev = NULL;
    		danger_node = false;
    		pattern_index = 0;		//可以指示某个终止节点确定的字符串是第几个pattern
    	}
    };
    Node gNodes[MAX_NODES];
    int gNodeCount = 2;
    
    void Insert(Node* root, char* str, int pattern_index){
    	Node* node = root;
    	char*p = str;
    	while (*p != ''){
    		int index = *p - 'A';
    		if (! node->childs[index]){
    			node->childs[index] = gNodes + gNodeCount++;
    		}
    		node = node->childs[index];
    		p++;
    	}
    	node->danger_node = true;
    	node->pattern_index = pattern_index;
    }
    
    void BuildDfa(){
    	Node* root = gNodes + 1;
    	for (int i = 0; i < LETTERS; i++){
    		gNodes[0].childs[i] = root;
    	}
    	root->prev = gNodes;
    	gNodes[0].prev = NULL;
    
    	deque<Node*> Q;
    	Q.push_back(root);
    	while (!Q.empty()){
    		Node* node = Q.front();
    		Node* prev = node->prev, *p;
    		Q.pop_front();
    		for (int i = 0; i < LETTERS; i++){
    			if (node->childs[i]){
    				p = prev;
    				while (p && !p->childs[i]){
    					p = p->prev;
    				}
    				node->childs[i]->prev = p->childs[i];
    				//这个地方注意,不能写成 p->childs[i]->danger_node = node->childs[i]->danger_node
    				if (p->childs[i]->danger_node)				
    					node->childs[i]->danger_node = true;
    				Q.push_back(node->childs[i]);
    			}
    		}
    	}
    }
    
    bool gPatterFind[MAX_SIZE];
    int gPatternFoundNum = 0;
    int gMinPatternLen = 0;
    int gPatternLen[MAX_SIZE];
    int gMoveStep[8][2] = { { -1, 0 }, { -1, 1 }, { 0, 1 }, { 1, 1 }, { 1, 0 }, { 1, -1 }, {0, -1 }, { -1, -1 } };
    
    //在Trie图上达到一个“危险”节点,则该节点的各个前缀指针,仍然可能为“终止”节点,沿前缀指针找出所有的终止节点,以防止遗漏
    //比如 ABCDFF 中查找 ABCD CD 若到达D,确定为一个危险节点,可以找到ABCD,若不沿着前缀指针找出所有的终止节点,则会遗漏CD
    void FindPatternFromEndPoint(Node* node, int r, int c, int dir){
    	do{
    		if (node->pattern_index == 0){
    			node = node->prev;
    			continue;
    		}
    
    		int pattern_index = node->pattern_index;
    		if (gPatterFind[pattern_index]){		//此时找到的串,有可能是别的串的前缀,因此继续向后找
    			node = node->prev;
    			continue;
    		}
    		gPatterFind[pattern_index] = true;
    		gPatternFoundNum++;
    
    		int beg_r = r - gPatternLen[pattern_index] * gMoveStep[dir][0];
    		int beg_c = c - gPatternLen[pattern_index] * gMoveStep[dir][1];
    		if (gMoveStep[dir][0] == 0)
    			beg_r--;
    
    		if (gMoveStep[dir][1] == 0)
    			beg_c--;
    
    		if (dir == 1 || dir == 7 || dir == 0){
    			beg_r -= 2;
    		}
    		if (dir == 5 || dir == 7 || dir == 6){
    			beg_c -= 2;
    		}
    		gPosInfo[pattern_index].SetInfo(beg_r, beg_c, dir);
    		
    		node = node->prev;
    	} while (node);
    
    }
    //从某个边界点出发,沿某个方向的最长字符串作为母串,在Trie图上进行查找
    void SearchStr(int start_x, int start_y, int dir){
    	int r = start_x, c = start_y;
    	Node* node = gNodes + 1;
    	while (gPizza[r][c] != ''){
    		int index = gPizza[r][c] - 'A';
    		while (node && node->childs[index] == NULL){
    			node = node->prev;
    		}
    		node = node->childs[index];
    		if (node->danger_node){
    			FindPatternFromEndPoint(node, r, c, dir);
    		}
    		r += gMoveStep[dir][0];
    		c += gMoveStep[dir][1];
    	}
    }
    //确定在边界上的某个点,沿某个方向所构成最长字符串的长度
    int MaxLen(int R, int C, int r, int c, int dir){
    	if (dir == 0 || dir == 4)
    		return R;
    	if (dir == 2 || dir == 6)
    		return C;
    	if (dir == 1){
    		if (c == 1)
    			return r;
    		else if (r == R)
    			return C - c + 1;
    	}
    	if (dir == 5){
    		if (r == 1)
    			return c;
    		else if (c == C)
    			return R - r + 1;
    	}
    	if (dir == 3){
    		if (r == 1)
    			return C - c + 1;
    		if (c == 1)
    			return R - r + 1;
    	}
    	if (dir == 7){
    		if (r == R)
    			return c;
    		if (c == C)
    			return r;
    	}
    	return -1;
    }
    
    //对边界上的每个点,在8个方向进行查找
    void SearchPuzzle(int R, int C, int total_word_to_find){
    	for (int r = 1; r <= R; r++){
    		for (int dir = 0; dir < 8; dir++){
    			if (gPatternFoundNum == total_word_to_find){
    				return;
    			}
    			if (MaxLen(R, C, r, 1, dir) >= gMinPatternLen){
    				SearchStr(r, 1, dir);
    			}
    		}		
    	}
    	for (int r = 1; r <= R; r++){
    		for (int dir = 0; dir < 8; dir++){
    			if (gPatternFoundNum == total_word_to_find){
    				return;
    			}
    			if (MaxLen(R, C, r, C, dir) >= gMinPatternLen){
    				SearchStr(r, C, dir);
    			}
    		}
    	}
    	for (int c = 1; c <= C; c++){
    		for (int dir = 0; dir < 8; dir++){
    			if (gPatternFoundNum == total_word_to_find){
    				return;
    			}
    			if (MaxLen(R, C, 1, c, dir) >= gMinPatternLen){
    				SearchStr(1, c, dir);
    			}
    		}
    	}
    	for (int c = 1; c <= C; c++){
    		for (int dir = 0; dir < 8; dir++){
    			if (gPatternFoundNum == total_word_to_find){
    				return;
    			}
    			if (MaxLen(R, C, R, c, dir) >= gMinPatternLen){
    				SearchStr(R, c, dir);
    			}
    		}
    	}
    }
    
    
    int main(){
    	int R, C, M;
    	scanf("%d %d %d", &R, &C, &M);	
    	memset(gPizza, 0, sizeof(gPizza));
    	memset(gPatterFind, false, sizeof(gPatterFind));
    	gNodeCount = 2;
    
    	for (int r = 1; r <= R; r++){
    		getchar();
    		for (int c = 1; c <= C; c++){
    			scanf("%c", &gPizza[r][c]);
    		}
    	}
    	getchar();
    	char str[MAX_SIZE];
    	Node* root = gNodes + 1;
    	for (int i = 1; i <= M; i++){
    		scanf("%s", str);
    		Insert(root, str, i);
    		gPatternLen[i] = strlen(str);
    		gMinPatternLen = MIN(gMinPatternLen, gPatternLen[i]);		
    	}	
    	
    	BuildDfa();
    
    	SearchPuzzle(R, C, M);
    	for (int i = 1; i <= M; i++){
    		printf("%d %d %c
    ", gPosInfo[i].row, gPosInfo[i].col, gPosInfo[i].dir + 'A');
    	}
    	return 0;
    }
    
  • 相关阅读:
    WPF 进度条
    WPF CPU使用率线性表
    Android annotation
    git 操作
    git 合并指定目录到master
    远程连接mysql报错,ERROR 1045 (28000): Access denied for user 'ODBC'@'localhost' (using password: NO)解决方案
    centos7 安装python2.7.14 并与原版本共存
    pycahrm 断点调试
    ImportError: No module named Crypto.Cipher 报错解决方法
    python操作 rabbitMQ
  • 原文地址:https://www.cnblogs.com/gtarcoder/p/4821317.html
Copyright © 2020-2023  润新知