• C++学习之IO流


    本博文主要介绍IO流中fstream,iterator的简单应用。

    问题描述:

    a):找一篇文章(The Bble Holy) ,将其所有的单词输入vector中,并统计其单词个数

    b):增加统计词频功能,用结构体实现(word ,count) ,将结构体存入vector中

    c):增加停用词功能。

      1):从网上找 英文停用词,并命名为 stop_list.txt;

      2):统计词频时,若遇到此类词直接略过。

    d):计算上述话费时间,具体有:

      1):读取文件时间;

      2):排序所用时间;

      3):打印所用时间。

    代码如下:

      1 #include <iostream>
      2 #include <string>
      3 #include <vector>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 #include <algorithm>
      8 #include <fstream>
      9 #include <ctype.h>
     10 #include <sys/time.h>
     11 #include <stdexcept>
     12 using namespace std ;
     13 
     14 struct Word 
     15 {
     16     string word_ ;
     17     int count_ ;
     18 };
     19 //readfile to vec
     20 void ReadFile(ifstream &in, vector<Word> &vec, const vector<string>&stop );
     21 //readstoplist to stop
     22 void ReadStopList( ifstream& in , vector<string> &stop);
     23 //judge s is a stopword or not
     24 bool IsStopWord(const vector<string>&stop , const string s );
     25 //treans s to lower one
     26 void stringToLower(string&s);
     27 //judge s include punct or not
     28 void ErasePunct(string &s);
     29 //judge vec include s or not & add word to vec 
     30 void AddWordToDict(vector<Word> &vec ,const string s);
     31 //sort word by frequency
     32 void WordSort(vector<Word> &vec );
     33 //print frequency of a word
     34 void printFrequency( const vector<Word>&vec ) ;
     35 //calculate runing_time
     36 int64_t getTime();
     37 int main(int argc, const char *argv[])
     38 {
     39     // readfile -->store  to vec --->sort ---> calculate count ---> print
     40     if(argc < 2)
     41     {
     42         perror("Usage:exe srcfile");
     43         exit(EXIT_FAILURE);
     44     }
     45     vector<Word> vec ;
     46     
     47     vector<string> stopList ;
     48     
     49     ifstream infile(argv[1]);
     50     ifstream stopfile("stop_list.txt");
     51     if( ! stopfile )
     52         throw std::runtime_error("stop_list does not exist!");
     53     if( !infile )
     54         throw std::runtime_error("open file failure");
     55     
     56     int64_t starttime = getTime();
     57     ReadStopList(stopfile , stopList);
     58     ReadFile( infile, vec ,stopList );
     59     int64_t readtime = getTime();
     60     
     61     WordSort( vec );
     62 
     63     int64_t sorttime = getTime();
     64     printFrequency( vec ) ;
     65     
     66     int printtime = getTime();
     67     cout << vec.size() << endl ;
     68     cout << "the time of reading file :" << (readtime - starttime) << endl ;
     69     cout << "the time of  sorting: "  << (sorttime - readtime) << endl ;
     70     cout<<"the time of printing"<< (printtime - sorttime) << endl ;
     71     infile.close();
     72     return 0;
     73 }
     74 
     75 //dos2unix command : window-->unix  //将windows文档格式转化成unix文档格式
     76 void ReadFile(ifstream &in, 
     77               vector<Word> &vec,
     78               const vector<string>&stop )
     79 {
     80     vec.clear();
     81     string s ;
     82     while( in >> s)
     83     {
     84         ErasePunct(s); // judge s dose include punct or not
     85         stringToLower(s);
     86         if(! IsStopWord(stop ,s))
     87        {
     88            AddWordToDict(vec , s);
     89        }
     90     }
     91 }
     92 
     93 void ReadStopList( ifstream& in , vector<string> &stop)
     94 {
     95     stop.clear();
     96     string s ;
     97     while( in >> s)
     98     {
     99        stop.push_back( s );
    100     }
    101 }
    102 void stringToLower(string &s)
    103 {
    104     string::iterator it =s.begin();
    105     while(it != s.end())
    106     {
    107         if(isupper(*it))
    108             *it = tolower(*it);
    109         it++ ;
    110     }
    111 }
    112 
    113 
    114 bool IsStopWord(const vector<string>&stop , const string s )
    115 {
    116     vector<string>::const_iterator it = stop.begin();
    117     while( it != stop.end()) 
    118     {
    119         if((*it) == s)
    120         {
    121             return true ;
    122         }
    123         it ++ ;
    124     }
    125     if(it == stop.end())
    126         return false ;
    127    //还可以用find函数 ,代码如下
    128    /*
    129         vector<string>::const_iterator it = 
    130         find(stop.begin(), stop.end(), s );
    131     return (it != stop.end());
    132         
    133         */
    134 }
    135 
    136 void ErasePunct(string &s)
    137 {
    138     string::iterator it = s.begin();
    139     while(it != s.end())
    140     {
    141         if(ispunct(*it))
    142             it = s.erase(it);
    143         else
    144             ++it ;
    145     }
    146 }
    147 
    148 void AddWordToDict(vector<Word> &vec ,const string s) 
    149 {
    150     vector<Word>::iterator it = vec.begin();////顺序查找
    151        while( it != vec.end())
    152     {
    153         if(it->word_ == s)
    154         {
    155             (it->count_) ++ ;
    156             break ;
    157         }
    158         ++ it ;
    159     }
    160     if(it == vec.end()) // 类似于链表操作
    161     {
    162         Word tmp ;
    163         tmp.word_ = s ;
    164         tmp.count_ = 1 ;
    165         vec.push_back(tmp);
    166     }
    167 }
    168 
    169 int tmp(const Word &w1 , const Word &w2)
    170 {
    171     // a > b
    172     return (w1.count_ > w2.count_);
    173 }
    174 void WordSort(vector<Word> &vec )
    175 {
    176     sort(vec.begin() , vec.end() ,tmp ); //库函数sort
    177 }
    178 
    179 void printFrequency( const vector<Word>&vec ) 
    180 {
    181     for(vector<Word>::const_iterator it = vec.begin(); //注意此处的const
    182         it != vec.end();
    183         ++ it)
    184      printf("word :%s, frequency: %d
    ", it->word_.c_str(), it->count_);
    185 int64_t getTime();
    186 }
    187 
    188 
    189 int64_t getTime()
    190 {
    191     struct timeval tm ;
    192     memset(&tm , 0, sizeof(tm));
    193     if(-1== gettimeofday(&tm ,NULL))
    194         throw runtime_error("gettime failure");
    195 
    196     int64_t  t =  tm.tv_usec ;
    197     t += tm.tv_sec*1000*1000 ; 
    198     return t ;
    199 }

    本程序时间复杂度为O(n*n),后续文章我们将讨论时间复杂度更优的算法。

  • 相关阅读:
    内部类与外部类的调用
    Docker学习(十二)中遇到的一些问题汇总
    Docker学习(十一)Docker系列结束-新的开始K8S
    Docker学习(十)Docker容器编排 Docker-compose
    Docker学习(九)Volumn容器间共享数据
    Docker学习(八)容器间单向通信
    Docker学习(七)实战
    Docker学习(六)Dockerfile构建自定义镜像
    Docker学习(五) Dockerfile基础命令
    Docker学习(四)Docker搭建Tomcat
  • 原文地址:https://www.cnblogs.com/xfxu/p/3982607.html
Copyright © 2020-2023  润新知