英文字频统计
strHello='''...'''.lower() fo = open('hello.txt', 'r', encoding='utf-8') hello = fo.read() fo.close() print(hello) sep = ''',?''' for ch in sep: strHello = strHello.replace(ch, '') strList = strHello.split() print(len(strList),strList) strSet = set(strList) exclude = {'i', 'in', 'the''anymore'} strSet = strSet-exclude print(len(strSet),strSet) strDict = {} for hello in strSet: strDict[hello] = strList.count(hello) print(strDict.items()) wcList = list(strDict.items()) wcList.sort() print(strDict.items()) print(wcList[:20])
运行结果
中文字频统计(小说《装在套子里的人》
import jieba fo = open ('taozi.txt', 'r', encoding='utf-8') zhuang = fo.read ().lower () fo.close () print (zhuang) sep = ',。?!;:“”‘’-——<_/>' for en in sep: zhuang = zhuang.replace (en, '') zhaung = list (jieba.cut_for_search (zhuang)) strSet = set (zhuang) # print(len(strSet), strSet) strDict = dict () for word in strSet: strDict[word] = zhuang.count (word) # print(len(strDict), strDict) wcList = list (strDict.items ()) # print(wcList) wcList.sort (key=lambda x: x[1], reverse=True) # print(wcList) for i in range (20): print (wcList[i])
运行结果