1 import jieba 2 """分析三国演义小说中名字出现次数最多的人物然后输出""" 3 #第一步 读取小说内容 4 fb=open('三国演义.txt','r',encoding='utf-8')# 'r'表示操作read 5 content=fb.read() 6 fb.close() 7 excludes = {"将军", "却说", "荆州", "二人", "不可", "不能", "如此", "商议", "如何", "主公", 8 9 "军士", "左右", "军马", "引兵", "次日", "大喜", "天下", "东吴", "于是", "今日", 10 11 "不敢", "魏兵","人马", "陛下", "一人", "不知", "汉中", "只见", "众将","蜀兵","丞相"} #排除 12 print('--------------------------------------------------------------------') 13 #第二步 分词 14 words=jieba._lcut(content) 15 #3.统计 容器 数据{单词:次数} 16 data={}#定义空字典 17 for word in words: 18 if(len(word)==1):#去除符号和单字 19 continue 20 elif word=='孔明曰'or word=='诸葛亮': 21 rename='孔明' 22 elif word=='玄德曰'or word=='玄德': 23 rename='刘备' 24 elif word=='云长'or word=='关公': 25 rename='关羽' 26 else: 27 rename=word 28 data[rename] = data.get(rename, 0) + 1 29 30 #2.去除干扰词汇 31 for word in excludes: 32 del(data[word]) 33 34 35 #排序 36 list=list(data.items())#转成列表以便排序 37 list.sort(key=lambda x:x[1],reverse=True) 38 fo=open('result.txt','w',encoding='utf-8') 39 40 for i in range(10):#返回序列 41 print('{:<10}{:>5}'.format(list[i][0],list[i][1]))#输出到控制台 42 fo.write('{:<10}{:>5}{}'.format(list[i][0],list[i][1],' '))#写入到文件 43 fo.close()
运行结果: