本周根据杨老师的spec对英语文章词频统计进行了改进
1.需求分析:
对英文文章中的英文单词进行词频统计并按照有大到小的顺序输出,
2.算法思想:
(1)构建一个类用于存放英文单词及其出现的次数
class WordAndNum{ public String word; public int num; }
(2)从txt中获取字符串
BufferedReader in=new BufferedReader(new FileReader("D:\test.txt")); String line=null; while((line=in.readLine())!=null)
(3)将字符串中的单词截取出来存到ArrayList中
ArrayList<WordAndNum> list=new ArrayList<WordAndNum>();
int index1=0; int index2 = 0; //截取英文单词 index1=line.indexOf(" "); String word=line.substring(0,index1); WordAndNum wdn=new WordAndNum(); wdn.word=word; wdn.num=1; list.add(wdn); for(int i=index1+1;i<line.length();i++){ if(line.charAt(i)==','||line.charAt(i)=='.'||line.charAt(i)==';'||line.charAt(i)==' '||line.charAt(i)=='?'){ index2=i; WordAndNum wdn2=new WordAndNum(); word=line.substring(index1+1,index2); wdn2.word=word; boolean flag=falsefor(int j=0;j<list.size();j++){ if(list.get(j).word.equals(word)){ wdn2.num=list.get(j).num+1; list.set(j, wdn2); flag=true; break; } } if(flag==false){ wdn2.num=1; list.add(wdn2); } index1=index2; } }
(4)对ArrayList进行冒泡排序
//冒泡排序 for(int i=0;i<list.size();i++){ for(int j=0;j<list.size()-i-1;j++){ if(list.get(j).num<list.get(j+1).num){ WordAndNum wd1=list.get(j); WordAndNum wd2=list.get(j+1); list.set(j, wd2); list.set(j+1, wd1); } } }
3.具体代码
1 package ruan.jian.gong.cheng; 2 3 import java.io.BufferedReader; 4 import java.io.FileReader; 5 import java.util.ArrayList; 6 7 class WordAndNum{ 8 public String word; 9 public int num; 10 } 11 12 public class wordNum2 { 13 public static void main(String[] args) { 14 try{ 15 BufferedReader in=new BufferedReader(new FileReader("D:\test.txt")); 16 String line=null; 17 ArrayList<WordAndNum> list=new ArrayList<WordAndNum>(); 18 while((line=in.readLine())!=null){ 19 int index1=0; 20 int index2 = 0; 21 //截取英文单词 22 index1=line.indexOf(" "); 23 String word=line.substring(0,index1); 24 WordAndNum wdn=new WordAndNum(); 25 wdn.word=word; 26 wdn.num=1; 27 list.add(wdn); 28 for(int i=index1+1;i<line.length();i++){ 29 if(line.charAt(i)==','||line.charAt(i)=='.'||line.charAt(i)==';'||line.charAt(i)==' '||line.charAt(i)=='?'){ 30 index2=i; 31 WordAndNum wdn2=new WordAndNum(); 32 word=line.substring(index1+1,index2); 33 wdn2.word=word; 34 boolean flag=false; 35 for(int j=0;j<list.size();j++){ 36 if(list.get(j).word.equals(word)){ 37 wdn2.num=list.get(j).num+1; 38 list.set(j, wdn2); 39 flag=true; 40 break; 41 } 42 } 43 if(flag==false){ 44 wdn2.num=1; 45 list.add(wdn2); 46 } 47 index1=index2; 48 } 49 } 50 } 51 //冒泡排序 52 for(int i=0;i<list.size();i++){ 53 for(int j=0;j<list.size()-i-1;j++){ 54 if(list.get(j).num<list.get(j+1).num){ 55 WordAndNum wd1=list.get(j); 56 WordAndNum wd2=list.get(j+1); 57 list.set(j, wd2); 58 list.set(j+1, wd1); 59 } 60 } 61 } 62 for(int i=0;i<list.size();i++){ 63 System.out.println(list.get(i).word+"==="+list.get(i).num); 64 } 65 }catch(Exception e){ 66 e.printStackTrace(); 67 } 68 69 } 70 }
4.以如下英文文章为例,输出结果为
5.结果分析:可以对英文文章进行词频统计并按从大到小进行输出