• 1st 英文文章词频统计


    英文文章词频统计:

    功能:统计一篇英文文章的单词总数及出现频数并输出,之后排序,输出频数前十的单词及其频数。

    实现方法:使用C语言,用fopen函数读入txt文件,fscanf函数逐个读入单词,结构体wordNode存储单词及其频数,以链表的形式连接在一起,最后使用插入排序进行分析,输出频数最高的5个单词。

     头文件

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>

     定义宏

    #define ERROR 1
    #define OK 0
    #define WORD_LENGTH 250

     自定义数据类型

    typedef int status;
    
    typedef struct Node
    {
        char word[WORD_LENGTH];
        int time;
        struct Node *next;
    }wordNode;

     定义全局变量

    wordNode *headNode = NULL;

     声明所有使用的函数

    wordNode *wordSearch(char *word,int *num);
    status wordCount(char *word,int *num);
    void printCountList(int *num);
    void PrintFirstFiveTimes();
    void mergeSort(wordNode **head);
    void FrontBackSplit(wordNode *head,wordNode **pre,wordNode **next);
    void wordJob(char word[]);
    wordNode *SortedMerge(wordNode *pre,wordNode *next);
    void release();

     主函数

    status main(int argc,char *argv[])
    {
        char temp[WORD_LENGTH];//定义用以临时存放单词的数组
        FILE *file;
        int count;
        int articleWordNum = 0;//定义统计结点个数的变量
        int *num = &articleWordNum;
        if((file = fopen("F:\zc\c\yjs\file.txt", "r")) == NULL)
        {
            printf("文件读取失败!");
            exit(1);
        }
        while((fscanf(file,"%s",temp))!= EOF)
        {
            wordJob(temp);
            count = wordCount(temp,num);
        }
        fclose(file);
        printf("
    输出所有单词的频数
    ");
        printCountList(num);
        printf("
    输出词频最高的5个词
    ");
        mergeSort(&headNode);              //排序
        PrintFirstFiveTimes();
        release();
        return 0;
    }

    查找单词所在结点并返回其地址

    wordNode *wordSearch(char *word,int *num)
    {
        wordNode *node;
        wordNode *nextNode = headNode;
        wordNode *preNode = NULL;
        char a[WORD_LENGTH];
        if(headNode == NULL)
        {
            node = (wordNode*)malloc(sizeof(wordNode));
            strcpy(node->word, word);
            node->time = 0;
            *num+=1;
            headNode = node;
            return node;
        }
        while(nextNode != NULL)          //查找匹配单词
        {
            strcpy(a,nextNode->word);
            if(strcmp(a, word) == 0)
            {
                return nextNode;
            }
            preNode = nextNode;
            nextNode = nextNode->next;
        }
    
        if(nextNode == NULL)
        {
            node = (wordNode*)malloc(sizeof(wordNode));
            strcpy(node->word, word);
            node->time = 0;
            node->next = headNode->next;
            headNode->next = node;
            *num+=1;
            return node;
        }
        else
            return nextNode;
    }

    进行词频统计

    status wordCount(char *word,int *num)
    {
        wordNode *tmpNode = NULL;
        tmpNode = wordSearch(word,num);      //word所在的节点
        if(tmpNode == NULL)
        {
            return ERROR;
        }
        tmpNode->time++;
        return 0;
    }

    输出所有词频

    void printCountList(int *num)
    {
        if(headNode == NULL)
        {
            printf("该文件无内容!");
        }
        else
        {
            wordNode *preNode = headNode;
            printf("
    	总计 %d 
    ",*num);
            while(preNode != NULL)
            {
                printf("
    	%s:%d次
    ",preNode->word,preNode->time);
                preNode = preNode->next;
            }
        }
    }

    输出词频最高的10个词

    void PrintFirstFiveTimes()
    {
        if(headNode == NULL)
        {
            printf("该文件无内容!");
        }
        else
        {
            wordNode *preNode = headNode;
            int i = 1;
            while (preNode != NULL && i<=5)
            {
                printf("
    	%s:%d次
    ",preNode->word,preNode->time);
                preNode = preNode->next;
                i++;
            }
        }
    }

    对词频统计结果进行归并排序

    void mergeSort(wordNode **headnode)
    {
        wordNode *pre,*next,*head;
        head = *headnode;
        if(head == NULL || head->next == NULL)
        {
            return;
        }
        FrontBackSplit(head,&pre,&next);
        mergeSort(&pre);
        mergeSort(&next);
        *headnode = SortedMerge(pre,next); 
    }

    取尾节点

    void FrontBackSplit(wordNode *source,wordNode **pre,wordNode **next)
    {
        wordNode *fast;
        wordNode *slow;
        if(source == NULL || source->next == NULL)
        {
            *pre = source;
            *next = NULL;
        }
        else
        {
            slow = source;
            fast = source->next;
            while(fast != NULL)
            {
                fast = fast->next;
                if(fast != NULL)
                {
                    slow = slow->next;
                    fast = fast->next;
                }
            }
            *pre = source;
            *next = slow->next;
            slow->next = NULL;
        }
    }

    取频数最大的节点作为头节点

    wordNode *SortedMerge(wordNode *pre,wordNode *next)
    {
        wordNode *result = NULL;
        if(pre == NULL)
            return next;
        else if(next == NULL)
            return pre;
        if(pre->time >= next->time)
        {
            result = pre;
            result->next = SortedMerge(pre->next,next);
        }
        else
        {
            result = next;
            result->next = SortedMerge(pre,next->next);
        }
        return result;
    }

    处理单词

    void wordJob(char word[])
    {
        int i,k;
        for(i = 0;i<strlen(word);i++)
        {
            if(word[i]>='A'&& word[i]<='Z')
            {
                word[i] += 32;
                continue;
            }
            if(word[i]<'a'||word[i]>'z')
            {
                if(i == (strlen(word)-1))
                {
                    word[i] = '';
                }
                else
                {
                    k = i;
                    while(i < strlen(word))
                    {
                        word[i] = word[i+1];
                        i++;
                    }
                    i = k;
                }
            }
        }
    }

    释放所有结点内存

    void release()
    {
        if(headNode == NULL)
            return;
        wordNode *pre = headNode;
        while(pre != NULL)
        {
            headNode = pre->next;
            free(pre);
            pre = headNode;
        }
    }

    git@git.coding.net:amberpass/Calculate_words.git

    https://git.coding.net/amberpass/Calculate_words.git

    程序运行结果

  • 相关阅读:
    matplotlib 进阶之origin and extent in imshow
    Momentum and NAG
    matplotlib 进阶之Tight Layout guide
    matplotlib 进阶之Constrained Layout Guide
    matplotlib 进阶之Customizing Figure Layouts Using GridSpec and Other Functions
    matplotlb 进阶之Styling with cycler
    matplotlib 进阶之Legend guide
    Django Admin Cookbook-10如何启用对计算字段的过滤
    Django Admin Cookbook-9如何启用对计算字段的排序
    Django Admin Cookbook-8如何在Django admin中优化查询
  • 原文地址:https://www.cnblogs.com/landscape/p/5845852.html
Copyright © 2020-2023  润新知