import operator import string f=open("a.txt","r") a=f.read() a1=f.read() s=string.punctuation+string.digits for i in s: #标点符号换成空格 a=a.replace(i," ") print(s) print(a) print() print(a.lower())#大写换成小写 b=a.split() print(b) c={} for i in b:#将键和对应的值输入字典 d=0 for j in b: if i==j: d=d+1 c[i]=d for i in c: print(i,':',c[i]) print() print() print() # 去除介词、冠词等 jie1={'to','for','and','of','is','a','an','the'} jie2=set(c) jie=jie2-jie1 print(jie) for i in jie: print(i,":",c[i]) #排序 f=sorted(c.items(), key=lambda e:e[1], reverse=True)#将字典c转换为有序的列表f print(f) j = {} for item in f:#将列表f转换为字典j j[item[0]] = item[1] for i in j: print(i,':',j[i]) #输出词频最大TOP20 jishu=0 print("词频最大TOP20:") for i in j: if jishu==20: break else: print(i,':',j[i]) jishu=jishu+1