1.读入待分析的字符串
fo=open('tt.txt','r')#以阅读方式打开文档tt.txt news=fo.read()
2.分解提取单词
news=news.lower() for i in ',.-"': news=news.replace(i,' ') words=news.split()
3.计数字典
dic={}
keys=set(words)
4.排除语法型词汇
exc={'the','a','to','of','and','in','that','on','this'} keys=keys-exc#或者 for i in exc: keys.remove(i)
5.排序
for i in keys: dic[i]=words.count(i) c=list(dic.items()) c.sort(key=lambda x:x[1],reverse=True)
6.输出TOP(20)
for i in range(20): print(c[i]) fo.close()