• 单词统计


           这次课堂测试总的来说收获还是蛮大的,用到了读取文件,虽然文件类型的测试也做了不少了,但确实是不够熟练,对文件这一块了解太少,其中一些基本的函数都不知道。所以还是要对学习学习这方面知识。

           这次实验大概用了4个小时,花费的时间与其他同学相比有点多,主要原因是总想从网上找模板,自己不亲自动手,但是课下自己亲手实践,确实是从中学到了东西,所以浪费的时间还是值得的。

           下面是这次练习的代码:

           

    package xxx;
    /* yr
     * 2019/4/28
     */
    import java.io.*;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.Scanner;
    public class Word{
     
     //统计26个英文字母(不区分大小写)出现的频率
     public void check1(String string) {
       try {
                 char num[] = new char[10000];          //存储读取的各个字符
                 char c[] = new char[26];                 //存储26个英文字母
                 c[0]='a';
                 for(int i=1;i<26;i++)
                 {
                  c[i]=(char)(c[i-1]+1);
                 }
                
                 int j=0;
                 double count[]=new double[26];                 //存储26个英文字母出现的次数
                 double p[]=new double[26];               //存储26个英文字母出现的频率
                 String pathname=string;
                 File filename=new File(pathname);
                 InputStreamReader reader=new InputStreamReader(new FileInputStream(filename));
                 BufferedReader br=new BufferedReader(reader);
                 String line[]=new String[100];;   
                 for(int i=0;i<line.length;i++){
                 line[i]=br.readLine();     //读取一行         
                 }
         
                 br.close();
                 int n=0;
              
                  while(line[n]!=null) {
                  
                      for(int i=0;i<line[n].length();i++) {                               
                           num[j]=line[n].charAt(i);
                           j++;                    
                      }
                  n++;
                 }   
                
                 for(int i=0;i<num.length;i++) {
                      switch(num[i]) {
                     
                      case 'a':count[0]++;break;
                      case 'b':count[1]++;break;
                      case 'c':count[2]++;break;
                      case 'd':count[3]++;break;
                      case 'e':count[4]++;break;
                      case 'f':count[5]++;break;
                      case 'g':count[6]++;break;
                      case 'h':count[7]++;break;
                      case 'i':count[8]++;break;
                      case 'j':count[9]++;break;
                      case 'k':count[10]++;break;
                      case 'l':count[11]++;break;
                      case 'm':count[12]++;break;
                      case 'n':count[13]++;break;
                      case 'o':count[14]++;break;
                      case 'p':count[15]++;break;
                      case 'q':count[16]++;break;
                      case 'r':count[17]++;break;
                      case 's':count[18]++;break;
                      case 't':count[19]++;break;
                      case 'u':count[20]++;break;
                      case 'v':count[21]++;break;
                      case 'w':count[22]++;break;
                      case 'x':count[23]++;break;
                      case 'y':count[24]++;break;
                      case 'z':count[25]++;break;
                      case 'A':count[0]++;break;
                      case 'B':count[1]++;break;
                      case 'C':count[2]++;break;
                      case 'D':count[3]++;break;
                      case 'E':count[4]++;break;
                      case 'F':count[5]++;break;
                      case 'G':count[7]++;break;
                      case 'H':count[6]++;break;
                      case 'I':count[8]++;break;
                      case 'J':count[9]++;break;
                      case 'K':count[10]++;break;
                      case 'L':count[11]++;break;
                      case 'M':count[12]++;break;
                      case 'N':count[13]++;break;
                      case 'O':count[14]++;break;
                      case 'P':count[15]++;break;
                      case 'Q':count[16]++;break;
                      case 'R':count[17]++;break;
                      case 'S':count[18]++;break;
                      case 'T':count[19]++;break;
                      case 'U':count[20]++;break;
                      case 'V':count[21]++;break;
                      case 'W':count[22]++;break;
                      case 'X':count[23]++;break;
                      case 'Y':count[24]++;break;
                      case 'Z':count[25]++;break;
                      }
                 }   
                
        
                 double sum=0.0;
                 System.out.println("短文中各字母出现情况统计如下:");
                 for(int i=0;i<26;i++)
                 {
                     sum=sum+count[i];
                 }
                
                
                 for(int i=0;i<26;i++)
                 {
                  
                     p[i]=(Math.round(count[i]/sum * 10000) / 100.0);
                   
                 }
              
                 //对频率数组进行排序
                 double temp;            
                 char f;
                 for(int i=0;i<25;i++) //冒泡排序,外循环有N-1轮
                  for(int t=0;t<25-i;t++)
                  if(p[t]<p[t+1])            //从大到小
                  {
                   temp=p[t];
                   p[t]=p[t+1];
                   p[t+1]=temp;                 
                   
                   f=c[t];
                   c[t]=c[t+1];
                   c[t+1]=f;
                  }
                
                 //输出
                 for(int i=0;i<26;i++)
                 {
                  System.out.println(c[i]+"的频率为:"+p[i]+"%");
                 }
               
             }catch (Exception e) {
                 e.printStackTrace();
             }
     }
     
     //统计每个单词出现的次数
     public void check2(String string) {
        BufferedReader br = null;
      try {
       br = new BufferedReader(new FileReader(string));
      } catch (FileNotFoundException e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
      }
             StringBuffer sb = new StringBuffer();
             String text =null;
             try {
        while ((text=br.readLine())!= null){
            sb.append(text);// 将读取出的字符追加到stringbuffer中
        }
       } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
       }
             try {
        br.close();
       } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
       }  // 关闭读入流
             String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写
             String[] words = str.split("[^(a-zA-Z)]+");  // 非单词的字符来分割,得到所有单词
             Map<String ,Integer> map = new HashMap<String, Integer>() ;
             for(String word :words){
                 if(map.get(word)==null){  // 若不存在说明是第一次,则加入到map,出现次数为1
                     map.put(word,1);
                 }else{
                     map.put(word,map.get(word)+1);  // 若存在,次数累加1
                 }
             }
             // 排序
            List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
             Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
                 public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
                  int i=left.getValue()-right.getValue();
                     if(i==0) {
                      return (right.getKey().compareTo(left.getKey()));
                     }
                     return (left.getValue().compareTo(right.getValue()));
                 }
             };
          
             // 集合默认升序
             Collections.sort(list,comparator);
             int n=list.size();
             System.out.println("请输入你要统计前多少个单词(<"+n+")");
             Scanner scanner=new Scanner(System.in);
             n=scanner.nextInt();
             for(int i=0;i<n;i++){// 由高到低输出
                 System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue());
             }
     }
     
     //递归遍历目录下所有文件
     public void test(String fileDir) {
      List<File> fileList = new ArrayList<File>();
      File file = new File(fileDir);
      File[] files = file.listFiles();// 获取目录下的所有文件或文件夹
      if (files == null) {// 如果目录为空,直接退出
       return;
      }
      // 遍历,目录下的所有文件
      for (File f : files) {
       if (f.isFile()) {
                   
        fileList.add(f);
       } else if (f.isDirectory()) {
        System.out.println(f.getAbsolutePath());
        test(f.getAbsolutePath());
       }
      }
      Word word=new Word();
      String str="";
      for (File f1 : fileList) {
       str=fileDir+"\\"+f1.getName();
       System.out.println(str);
       //对文件进行统计
       word.check1(str);
       word.check2(str);
      }
      
     }
      public static void main(String args[]) {
         Word word=new Word();
         Scanner scanner=new Scanner(System.in);
         System.out.println("请输入要查询的文件目录如(D:\\大二下\\软件工程\\wendang\\word.txt)");
         String str=scanner.next();
         System.out.println(str);
         word.test(str);
        
          
        }
     }
     
  • 相关阅读:
    LeetCode Count of Range Sum
    LeetCode 158. Read N Characters Given Read4 II
    LeetCode 157. Read N Characters Given Read4
    LeetCode 317. Shortest Distance from All Buildings
    LeetCode Smallest Rectangle Enclosing Black Pixels
    LeetCode 315. Count of Smaller Numbers After Self
    LeetCode 332. Reconstruct Itinerary
    LeetCode 310. Minimum Height Trees
    LeetCode 163. Missing Ranges
    LeetCode Verify Preorder Serialization of a Binary Tree
  • 原文地址:https://www.cnblogs.com/qianmo123/p/10786850.html
Copyright © 2020-2023  润新知