编码行数:850
编码时长:13h
发表博客数量:8
所学知识点:
统计文章中各单词出现的频率,并去掉常用单词。问题解决基本思路:先将所要读取的文件打开,先定义一个fileReader对象,用来初始化BufferedReader,new一个BufferedReader对象,将文件内容读取到缓存,定义一个字符串缓存,将字符串存放缓存中。然后按行读取,每一行读取的内容用append方法放在字符串缓存后边,再用toString()将其转化为字符串,根据".",",","?","!",":","‘","’","“","”","—",";","-"等将他们替换成空格,在根据空格分割成一个个单词放进字符串数组中,这里的方法用到Map方法,Map<String, Integer> map = new TreeMap<String, Integer>();两个参数分别代表出现的单词和其出现的次数。用for循环遍历单词数组,先统一转化成小写,在调用自定义方法judgeNouse()去掉常见单词,用map.get方法看其出现的次数,如果为0则不重复出现则其count记为1,否则为重复出现,其count++;并
class entity11{//定义单词的实体类,便于list对象集合操作 String danci; int cishu; public entity11(String zimu,int cishu) { this.danci = zimu; this.cishu = cishu; } public String getDanci() { return danci; } public int getCishu() { return cishu; } } public class DanciCollect { public static boolean judgeNouse(String str) throws IOException {//去掉常用单词的方法 boolean flag = true; FileReader fr = new FileReader("judge.txt"); BufferedReader bf = new BufferedReader(fr); String str1; // 按行读取字符串 while ((str1 = bf.readLine()) != null) { if(str.equals(str1)) { flag = false; } } bf.close(); fr.close(); return flag; } public static String toLowerCase(String str) {//将字符串转化成小写 char []StringArr = str.toCharArray(); for (int i = 0; i < StringArr.length; i++) { StringArr[i] = Character.toLowerCase(StringArr[i]); } StringBuffer sb = new StringBuffer(); for(int i = 0;i < StringArr.length;i++) { sb.append(StringArr[i]); } String str1 = sb.toString(); return str1; } public static void collect1() throws IOException { try { File file1 = new File("piao.txt");//定义一个file对象,用来初始化FileReader FileReader reader1 = new FileReader(file1);//定义一个fileReader对象,用来初始化BufferedReader BufferedReader bReader1 = new BufferedReader(reader1);//new一个BufferedReader对象,将文件内容读取到缓存 StringBuilder sb1 = new StringBuilder();//定义一个字符串缓存,将字符串存放缓存中 String s1 = ""; while ((s1 =bReader1.readLine()) != null) {//逐行读取文件内容,不读取换行符和末尾的空格 sb1.append(s1);//将读取的字符串添加换行符后累加p存放在缓存中 } bReader1.close(); String text = sb1.toString(); int i=0; String[] array = {".",",","?","!",":","‘","’","“","”","—",";","-"}; for (int j = 0; j < array.length; j++) { text = text.replace(array[j]," "); //将text中的array数组中包含的特殊字符用空格代替 } String[] textArray = text.split(" "); //根据空格将text分割并存放在textArray中 Map<String, Integer> map = new TreeMap<String, Integer>(); Integer count = 0;//每个字母的次数 Integer total = 0;//总共多少个字母 while(i < textArray.length) { String str = toLowerCase(textArray[i]); if(!judgeNouse(str)) { total = total + 1; count = map.get(str+""); if (count == null) {//单词没有出现重复; count = 1; }else {//单词出现重复,count+1; count++; } map.put(str+"", count); i++; } else { i++; } } List<entity11> result = new ArrayList<>(); entity11 e = null; for (String str : map.keySet()) { e = new entity11(str,map.get(str)); result.add(e); } result.sort((entity11 e1,entity11 e2)->{ return e2.getCishu()-e1.getCishu();}); System.out.println("文章共计"+total+"个单词"); for(int ii = 0 ; ii < result.size();ii++) { System.out.println(result.get(ii).getDanci()+"在文章中出现"+result.get(ii).getCishu()+"次,其频率为"+String.format("%.2f",result.get(ii).getCishu()*1.0/total)); } // for(entity11 ee : result) { // System.out.println("单词"+ee.getDanci()+"在文章中出现"+ee.getCishu()+"次,其频率为"+String.format("%.2f",ee.getCishu()*1.0/total)); // } // for (String str : map.keySet()) { // System.out.println("字母"+str+"出现"+map.get(str)+"次,其频率为:"+String.format("%.2f",map.get(str)*1.0/total)); // } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void main(String args[]) throws IOException { try { DanciCollect dancicollect = new DanciCollect(); DanciCollect.collect1(); }catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
且将其单词本身字符串和出现次数count赋值给map对象map.put();最后将其转化成List集合,根据出现频率排序输出。源代码如下: