首先,我在网上找了一个WordCount的源码,在源码上进行修改
原文博主:波涛汹涌
原文链接:http://blog.sina.com.cn/s/blog_51335a000101ouzf.html
原文代码:
wordCountDemo
原文的代码实现读取文件,统计所有出现过的单词(不重复),并统计打印每个单词出现的次数。代码导入包时有两个错误。且打印存在重复,输出不好看。
修改后的代码:
1 import java.io.BufferedReader; 2 import java.io.BufferedWriter; 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.FileReader; 6 import java.io.FileWriter; 7 import java.io.IOException; 8 import java.io.InputStreamReader; 9 import java.util.ArrayList; 10 import java.util.Collections; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.List; 14 import java.util.Map; 15 import java.util.Set; 16 import java.util.TreeMap; 17 18 /** 19 * 20 * @author 张军洪 21 * original author:波涛汹涌 22 * http://blog.sina.com.cn/s/blog_51335a000101ouzf.html 23 * 24 */ 25 26 public class WordCount { 27 private static String path_src = "C:/Users/张军洪/Desktop/shenma.txt"; 28 private static String path_result = "C:/Users/张军洪/Desktop/shenma_result.txt"; 29 private static BufferedReader br = null; 30 private static BufferedWriter bw = null; 31 private static String line_current = null; 32 private static String[] words = null; 33 private static List<String> word_list = new ArrayList<String>(); //所有出现的单词(不重复) 34 static int countWord=0; 35 static int countChar=0; 36 static int countLine=0; 37 static String s=""; 38 static String strCount=""; 39 40 public static void main(String[] args) { 41 42 File file = new File(path_src); // 读取文件 43 44 // 错误处理 45 if (!file.exists()) { 46 System.out.println("file " + file + " is not existed, exit"); 47 return; 48 } 49 try { 50 br = new BufferedReader(new FileReader(file.getPath())); 51 line_current = br.readLine(); 52 while (line_current != null) { 53 words = line_current.split(" |,|\\."); 54 for (String s : words) { 55 if (!s.equals("")) 56 word_list.add(s); 57 } 58 59 line_current = br.readLine(); 60 } 61 62 // 打印单词 63 for (String temp : word_list) { 64 System.out.println(temp); 65 } 66 br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(path_src)))); 67 while((s=br.readLine())!=null) 68 { 69 s=s+" "; 70 strCount+=s; 71 countLine++; 72 } 73 for(int i=0;i<strCount.split(" ").length;i++){ 74 if(!strCount.split(" ")[i].equals(" ")) 75 countWord++; 76 countChar+= strCount.split(" ")[i].length(); 77 } 78 System.out.println(); 79 // System.out.println("单词数:"+countWord); 80 // System.out.println("字符数:"+countChar); 81 // System.out.println("行数:"+countLine); 82 // System.out.println(); 83 84 // HashSet 85 Set<String> hashSet = new HashSet<String>(word_list); 86 // for (String str : hashSet) { 87 // System.out.println("word: " + str + ", occur times: " + Collections.frequency(word_list, str)); 88 // } 89 90 // HashMap 91 Map<String, Integer> hashMap = new HashMap<String, Integer>(); 92 for (String temp : word_list) { 93 Integer count = hashMap.get(temp); 94 hashMap.put(temp, (count == null) ? 1 : count + 1); 95 } 96 97 // TreeMap 98 TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>(hashMap); 99 100 // Record result to another file 101 printMap(treeMap); 102 } catch (IOException e) { 103 e.printStackTrace(); 104 } finally { 105 closeInputStream(br); 106 closeOutputStream(bw); 107 } 108 } 109 110 //将数据写入文件(两种打印方式) 111 public static void printMap(Map<String, Integer> map) throws IOException { 112 113 bw = new BufferedWriter(new FileWriter(path_result)); 114 115 Set<String> keys = map.keySet(); 116 for (String s : keys) { 117 System.out.println("word: " + s + ", times: " + map.get(s)); 118 writeResult("word: " + s + ", times: " + map.get(s)); 119 } 120 System.out.println(); 121 System.out.println("单词数目:" + countWord); 122 writeResult(String.valueOf(countWord)); 123 System.out.println("字符数目:" + countChar); 124 writeResult(String.valueOf(countChar)); 125 System.out.println("行数:" + countLine); 126 writeResult(String.valueOf(countLine)); 127 // for (Map.Entry<String, Integer> entry : map.entrySet()) { 128 // System.out.println("word: " + entry.getKey() + ", number : " + entry.getValue()); 129 // writeResult("word: " + entry.getKey() + ", number : " + entry.getValue()); 130 // } 131 132 } 133 134 //写入函数 135 public static void writeResult(String line) throws IOException { 136 137 try { 138 if (bw != null) { 139 bw.write(line); 140 bw.newLine(); 141 bw.flush(); 142 } 143 } catch (IOException e) { 144 e.printStackTrace(); 145 closeOutputStream(bw); 146 } 147 } 148 149 public static void closeOutputStream(BufferedWriter writer) { 150 try { 151 if (writer != null) { 152 writer.close(); 153 } 154 } catch (IOException e) { 155 e.printStackTrace(); 156 } 157 } 158 159 public static void closeInputStream(BufferedReader reader) { 160 try { 161 if (reader != null) { 162 reader.close(); 163 } 164 } catch (IOException e) { 165 e.printStackTrace(); 166 } 167 } 168 }
经过修改的代码在原有功能的基础上,添加了能够统计单词数目,字符数目,文件行数的功能。优化输出打印方式,使结果查看更简单。