题目:单词统计
第1步:输出某个英文文本文件中 26 字母出现的频率,由高到低排列,并显示字母出现的百分比,精确到小数点后面两位。
第2步:输出单个文件中的前 N 个最常出现的英语单词。作用:一个用于统计文本文件中的英语单词出现频率。
设计思想:首先是统计字母,我们应该先把要统计的文件读取,遍历统计字母出现的次数,将大写字母转换为小写字母;统计单词也需要将大写字母转换为小写,只要遇到空格则记为一个单词,遍历一遍统计单词个数。
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Scanner;
import java.awt.List;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.text.NumberFormat;
public class tongji {
public static void main(String[] args) {
File src =new File("c:/Harry Potter and the Sorcerer's Stone.txt");
InputStream is=null;
try {
is=new FileInputStream(src);
int temp;
int[] p=new int[56];
while((temp=is.read())!=-1)//当数据为不存在时,返回-1
{
char t=(char)temp;
if(t=='a'||t=='A')
{
p[0]++;
}
if(t=='b'||t=='B')
{
p[1]++;
}
if(t=='c'||t=='C')
{
p[2]++;
}
if(t=='d'||t=='D')
{
p[3]++;
}
if(t=='e'||t=='E')
{
p[4]++;
}
if(t=='f'||t=='F')
{
p[5]++;
}
if(t=='g'||t=='G')
{
p[6]++;
}
if(t=='h'||t=='H')
{
p[7]++;
}
if(t=='i'||t=='I')
{
p[8]++;
}
if(t=='j'||t=='J')
{
p[9]++;
}
if(t=='k'||t=='K')
{
p[10]++;
}
if(t=='l'||t=='L')
{
p[11]++;
}
if(t=='m'||t=='M')
{
p[12]++;
}
if(t=='n'||t=='N')
{
p[13]++;
}
if(t=='o'||t=='O')
{
p[14]++;
}
if(t=='P'||t=='p')
{
p[15]++;
}
if(t=='q'||t=='Q')
{
p[16]++;
}
if(t=='r'||t=='R')
{
p[17]++;
}
if(t=='S'||t=='s')
{
p[18]++;
}
if(t=='t'||t=='T')
{
p[19]++;
}
if(t=='u'||t=='U')
{
p[20]++;
}
if(t=='v'||t=='V')
{
p[21]++;
}
if(t=='w'||t=='W')
{
p[22]++;
}
if(t=='X'||t=='x')
{
p[23]++;
}
if(t=='Y'||t=='y')
{
p[24]++;
}
if(t=='z'||t=='Z')
{
p[25]++;
}
}
int[] y=new int[26];
for(int r=0;r<26;r++)
{
y[r]=p[r];
}
int templ=0;
for(int i=0;i<26;i++)
{
templ+=p[i];
}
float qq=(float)templ;
int te;
//冒泡排序
for(int g=0;g<24;g++)
{
for(int f=0;f<24-g;f++)
{
if(p[f]<p[f+1])
{
te=p[f];
p[f]=p[f+1];
p[f+1]=te;
}
}}
for(int j=0;j<26;j++) {
NumberFormat nt = NumberFormat.getPercentInstance();//获取百分数实例
nt.setMinimumFractionDigits(2);//保留百分数后两位
char w=' ';
for(int b=0;b<26;b++) {
if(p[j]==y[b]) {
switch (b) {
case 0:
w='a';
break;
case 1:
w='b';
break;
case 2:
w='c';
break;
case 3:
w='d';
break;
case 4:
w='e';
break;
case 5:
w='f';
break;
case 6:
w='g';
break;
case 7:
w='h';
break;
case 8:
w='i';
break;
case 9:
w='j';
break;
case 10:
w='k';
break;
case 11:
w='l';
break;
case 12:
w='m';
break;
case 13:
w='n';
break;
case 14:
w='o';
break;
case 15:
w='p';
break;
case 16:
w='q';
break;
case 17:
w='r';
break;
case 18:
w='s';
break;
case 19:
w='t';
break;
case 20:
w='u';
break;
case 21:
w='v';
break;
case 22:
w='w';
break;
case 23:
w='x';
break;
case 24:
w='y';
break;
case 25:
w='z';
break;
default:
break;
}
}
}
float q=(float)p[j];
System.out.println(w+"---"+nt.format(q/qq));
//System.out.println(p[j]/templ);
}
//System.out.println(templ);
//System.out.println(p[0]);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}} }}
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.NoSuchFileException;
import java.util.*;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class danci{
public static void main(String[] args) throws IOException{
ArrayList<String> AL = new ArrayList<String>();
try {
FileInputStream IS = new FileInputStream("c:/Harry Potter and the Sorcerer's Stone.txt");
Scanner S = new Scanner(IS);
while(S.hasNextLine()){
StringTokenizer st = new StringTokenizer(StringFunc(S.nextLine()));
while(st.hasMoreTokens()) {
AL.add(st.nextToken());
}
}
IS.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
HashFunc(AL);
}
//handle the string
public static String StringFunc(String Str) {
Str = Str.toLowerCase();
Str = Pattern.compile("[^A-Za-z]+").matcher(Str).replaceAll(" ");
return Str;
}
//put elements in a hashtable and count how many times they appear
public static void HashFunc(ArrayList<String> AL) {
HashMap<String, Integer> Hmap = new LinkedHashMap<>();
Collections.sort(AL);
for (String temp : AL) {
Integer count = Hmap.get(temp);
Hmap.put(temp, (count == null) ? 1 : count + 1);
}
Iterator iter = Hmap.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry entry = (Map.Entry) iter.next();
Object key = entry.getKey();
Object val = entry.getValue();
System.out.println(val + " " + key);
}
}
}