• 给定一个英文原文,统计文件里面一共有多少个不同的英文单词


    wordsCounter.cpp

    // wordsCounter.cpp : Defines the entry point for the console application.
    //

    #include "stdafx.h"
    #include "wordsCounter.h"

    #ifdef _DEBUG
    #define new DEBUG_NEW
    #undef THIS_FILE
    static char THIS_FILE[] = __FILE__;
    #endif

    /////////////////////////////////////////////////////////////////////////////
    // The one and only application object

    CWinApp theApp;

    using namespace std;

    //功能:
    //给定一个英文原文,统计文件里面一共有多少个不同的英文单词,建议使用C++,注意程序效率.
    //说明:
    //不支持中文,不支持长度大于100的单词

    #define ISUPPER(x) (x>='A' && x<='Z')
    #define ISLOWER(x) (x>='a' && x<='z')
    #define ISNUM(x) (x>='0' && x<='9')
    #define isalnum(x) (ISUPPER(x) || ISLOWER(x) || ISNUM(x))

    #define CHECK(x) {if(!(x)){printf("ERROR IN " #x);return;}}

    struct NODE
    {
    NODE *next;
    char text[100];
    int num;
    };

    void InsertWord(NODE *&root,char *text)
    {
    CHECK(text);

    if(!root)//如果是空链表,就直接插入作为根
    {
    root=new NODE;
    strcpy(root->text,text);
    root->num=1;
    root->next=0;
    }
    else
    {
    NODE *p=root;
    NODE *prev=0;

    while (p)//否则,找链表中等于当前单词的项
    {
    if(stricmp(p->text,text)==0)//如果找到,就增加引用次数num
    {
    p->num++;
    break;
    }
    else//否则继续找
    {
    prev=p;
    p=p->next;
    }
    }

    if(!p)//如果找不到,就插入到链表最后面
    {
    p=new NODE;
    strcpy(p->text,text);
    p->num=1;
    p->next=0;
    prev->next=p;
    }
    }
    }

    void Parse(NODE *&root,char *str)
    {
    char *p1=str,*p2=str;//p1指向单词开头,p2指向单词结尾
    char word[100];

    while(*p1)
    {
    //找单词的开头
    while (*p1 && !isalnum(*p1))
    p1++;

    if(!*p1)
    break;
    else
    {
    p2=p1;
    }

    //printf("enter while *p2 ");

    //找单词的结尾
    while (*p2 && /**p2!=' ' && *p2!=' ' &&*/ isalnum(*p2) )
    {
    p2++;
    }

    //printf("p2-p1 %d,*p2 %c ",p2-p1,*p2);

    //将单词加入到链表中
    if(p2>p1)
    {
    CHECK(p2-p1<100);
    strncpy(word,p1,p2-p1);
    word[p2-p1]=0;
    //printf("Find %s ",word);
    InsertWord(root,word);
    p1=p2;
    }
    else
    break;
    }
    }

    void PrintNodes(NODE *root)
    {
    CHECK(root);
    int wordsNum=0;
    int diffWordsNum=0;
    NODE *pmaxNum=0;

    while (root)
    {
    wordsNum+=root->num;
    diffWordsNum++;

    if(!pmaxNum)
    pmaxNum=root;
    else if(pmaxNum->num < root->num)
    pmaxNum=root;

    printf("%s %d ",root->text,root->num);
    root=root->next;
    }

    printf("共%d个单词,共%d个不同单词,出现最多的单词是 %s [%d] ",wordsNum,diffWordsNum,pmaxNum->text,pmaxNum->num);
    }

    void ReleaseLinks(NODE *&root)
    {

    if(!root)
    return;

    NODE *p=root;

    while (root)
    {
    p=root;
    root=root->next;
    delete p;
    }
    }

    void _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
    {
    int nRetCode = 0;

    // initialize MFC and print and error on failure
    if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
    {
    // TODO: change error code to suit your needs
    cerr << _T("Fatal Error: MFC initialization failed") << endl;
    nRetCode = 1;
    }
    else
    {
    // TODO: code your application's behavior here.
    CString strHello;
    strHello.LoadString(IDS_HELLO);
    cout << (LPCTSTR)strHello << endl;

    NODE *root=0;
    //char str[1000]="hello test what hello HeLLO 123 3432 what is a dog my name is hejinshou";
    FILE *fp=fopen("wordscounter.cpp","r");
    int len=0;
    char *str=0;

    CHECK(fp);
    fseek(fp,0,2);
    len=ftell(fp);
    CHECK(len>0);
    str=new char[len+1];
    fseek(fp,0,0);
    fread(str,len,1,fp);
    str[len]=0;

    Parse(root,str);
    PrintNodes(root);

    ReleaseLinks(root);
    delete []str;
    }

    return ;
    }

  • 相关阅读:
    CRM SFA Determine the Type of Claim Rule Template to Use
    Log4j 打印堆栈信息
    树查找 二分法
    CRM 公海 领取规则 策略
    【设计模式】策略模式与状态模式
    Alibaba crm
    CRM easy rule & Resource Duplicate Detection
    CRM 线索分配
    SAAS CRM SFA 线索 分配
    SOFA & COLA 企业应用框架 & 代码精进
  • 原文地址:https://www.cnblogs.com/timssd/p/4160726.html
Copyright © 2020-2023  润新知