字典树 - 润新知

字典树

    字典树：又称为Trie，是一种用于快速检索的多叉树结构。Trie把要查找的关键词看作一个字符序列，并根据构成关键词字符的先后顺序构造用于检索的树结构；一棵m度的Trie树或者为空，或者由m棵m度的Trie树构成。

在Trie树中查找一个关键字的时间和树中包含的结点数无关，而取决于组成关键字的字符数。

如果要查找的关键字可以分解成字符序列且不是很长，利用Trie树查找速度优于二叉查找树。

若关键字长度最大是5，则利用Trie树，利用5次比较可以从26⁵＝11881376个可能的关键字中检索出指定的关键字。而利用二叉查找树至少要进行log₂26⁵=23.5次比较。

字典树的基本功能是用来查询某个单词（前缀）在所有单词中出现次数的一种数据结构，它的插入和查询复杂度都为O(len),Len为单词（前缀）长度，但是它的空间复杂度却非常高，如果字符集是26个字母，那每个节点的度就有26个，典型的以空间换时间结构。

字典树基本模板：

#define MAX    26 //字符集大小

typedef struct TrieNode

{

    int nCount; //记录该字符出现次数

    struct TrieNode *next[MAX];

}TrieNode;

TrieNode Memory[1000000];

int allocp = 0;

/*初始化*/

void InitTrieRoot(TrieNode **pRoot)

{

    *pRoot = NULL;

}

/*创建新结点*/

TrieNode *CreateTrieNode()

{

    int i;

    TrieNode *p;

    p = &Memory[allocp++];

    p->nCount = 1;

    for(i = 0 ; i < MAX ; i++)

    {

        p->next[i] = NULL;

    }

    return p;

}

/*插入*/

void InsertTrie(TrieNode **pRoot , char *s)

{

    int i , k;

    TrieNode *p;

    if(!(p = *pRoot))

    {

        p = *pRoot = CreateTrieNode();

    }

    i = 0;

    while(s[i])

    {

        k = s[i++] - 'a'; //确定branch

        if(p->next[k])

           p->next[k]->nCount++;

        else

            p->next[k] = CreateTrieNode();

        p = p->next[k];

    }

}

//查找

int SearchTrie(TrieNode **pRoot , char *s)

{

    TrieNode *p;

    int i , k;

    if(!(p = *pRoot))

    {

        return 0;

    }

    i = 0;

    while(s[i])

    {

        k = s[i++] - 'a';

        if(p->next[k] ==NULL)    return 0;

        p = p->next[k];

    }

    return p->nCount;

}

统计难题(这里都用数组分配结点，用malloc分配太慢了）这题就是统计一组字符串中某前缀出现次数（字典树第一类应用），因此只要简单的套模板就行了（在节点中设置一个成员变量nCount，来记录该字符出现次数）

#include <stdio.h>

#define MAX    26

typedef struct TrieNode

{

    int nCount;

    struct TrieNode *next[MAX];

}TrieNode;

TrieNode Memory[1000000];

int allocp = 0;

void InitTrieRoot(TrieNode **pRoot)

{

    *pRoot = NULL;

}

TrieNode *CreateTrieNode()

{

    int i;

    TrieNode *p;

    p = &Memory[allocp++];

    p->nCount = 1;

    for(i = 0 ; i < MAX ; i++)

    {

        p->next[i] = NULL;

    }

    return p;

}

void InsertTrie(TrieNode **pRoot , char *s)

{

    int i , k;

    TrieNode *p;

    if(!(p = *pRoot))

    {

        p = *pRoot = CreateTrieNode();

    }

    i = 0;

    while(s[i])

    {

        k = s[i++] - 'a'; //确定branch

        if(p->next[k])

           p->next[k]->nCount++;

        else

            p->next[k] = CreateTrieNode();

        p = p->next[k];

    }

}

int SearchTrie(TrieNode **pRoot , char *s)

{

    TrieNode *p;

    int i , k;

    if(!(p = *pRoot))

    {

        return 0;

    }

    i = 0;

    while(s[i])

    {

        k = s[i++] - 'a';

        if(p->next[k] ==NULL)    return 0;

        p = p->next[k];

    }

    return p->nCount;

}



int main(void)

{

    char s[11];



    TrieNode *Root = NULL;

    InitTrieRoot(&Root);

    while(gets(s) &&s[0])

    {

        InsertTrie(&Root , s);

    }

    while(gets(s))

    {

        printf("%d\n",SearchTrie(&Root , s));

    }



    return    0;

}

另外，下面是一个字典树的变种，数的每个节点不再存储字符，而是单词，利用strcmp，形成一个排序二叉树，利用这个结构，可以统计词频：

#include <stdio.h>

#include <ctype.h>

#include <string.h>

#include <stdlib.h>

#define MAXWORD 100

int open(char*Vocabulary,int mode);

struct tnode{                //树的节点

       char *word;           //指向单词的指针

       int count;            //单词出现的次数

       struct tnode *left;   //左子节点

       struct tnode *right; //右子节点

       };

struct tnode *addtree(struct tnode *,char *);

void treeprint(struct tnode *);

int getword(char *,int);

//单词出现频率的统计

main()

{

struct tnode *root;

char word[MAXWORD];

root = NULL;

while(getword(word,MAXWORD)!=EOF)

      if(isalpha(word[0]))

          root=addtree(root,word);

treeprint(root);

return 0;

}

struct tnode *talloc(void);

//char *strdup(char *s);

//addtree函数:在p的位置或者P的下方增加一个W节点

struct tnode *addtree(struct tnode *p,char *w)

{

       int cond;



       if(p==NULL){    //该单词是一个新单词

          p=talloc(); //创建一个新节点

          p->word=strdup(w);

          p->count=1;

         p->left=p->right=NULL;

       }elseif((cond=strcmp(w,p->word))==0)

          p->count++;   //新单词与节点中的单词匹配

       else if(cond<0) //如果小于该节点中的单词，则进入左子树

          p->left=addtree(p->left,w);

       else             //如果大于该节点的单词，则进入右子树

         p->right=addtree(p->right,w);

       return p;

}



//treeprint函数:按序列打印树P

void treeprint(struct tnode *p)

{

     if(p!=NULL){

        treeprint(p->left);   //左子树

        printf("%6d %s%\n",p->count,p->word); //本身

        treeprint(p->right); //右子树

     }

}

//getword:get next word or character input

int getword(char *word,int lim)

{

    int c,getch(void);

    void ungetch(int);

    char *w=word;



    while(isspace(c=getch()))

      ;

    if(c!=EOF)

       *w++=c;

    if(!isalpha(c)){

       *w='\0';

       return c;

    }

    for(;--lim>0;w++)

        if(!isalnum(*w=getch())){

           ungetch(*w);

           break;

        }

        *w='\0';

        return word[0];

}

#define BUFSIZE 100

char buf[BUFSIZE];

int bufp=0;

int getch(void)

{

    return(bufp>0)?buf[--bufp]:getchar();

}

void ungetch(int c)

{

     if(bufp>=BUFSIZE)

         printf("ungetch:toomany charactors\n");

     else

         buf[bufp++]=c;

}



#include <stdlib.h>

//talloc函数:创建一个tnode

struct tnode *talloc(void)

{

       return (struct tnode*)malloc(sizeof(struct tnode));

}

节选自：http://www.cnblogs.com/DiaoCow/archive/2010/04/19/1715337.html

呵呵
相关阅读:
Java应用开发与实践
 大话存储：存储系统底层架构原理极限剖析(终极版)
Excel 2016公式与函数应用大全
 Excel高效办公应用技巧
 业务弯路池子
 一个现象，
为什么有时候进入这么多次，一次是 38次，一次是 114次，
恶心从判断开始，
but,
这两个的意思是不同的。。。
原文地址：https://www.cnblogs.com/gqtcgq/p/7247280.html