要求:
对文件单词进行统计,不区分大小写,并显示单词重复最多的十个单词
思路:
利用字典key,value的特性存单词及其重复的次数
每行进行特殊字符的处理,分离出被特殊字符包含的单词
def makekey(s:str)->list: lst = [] s_complex = set(r"""!`#.,-*()/[]*""") #利用集合装置特殊字符,前缀r不用转义 for word_i in s: if word_i in s_complex: lst.append(" ") else: lst.append(word_i) new_string = "".join(lst).split() return new_string src = '/tmp/sample.txt' dic = {} with open(src,'r') as f: # f.readlines() for line in f: words_list=line.lower().split() for word in words_list: #str in list word = makekey(word) #return list for words in word: if words in dic.keys(): dic[words]+=1 else: dic[words] = 1 reverse_dict = sorted(dic.items(),key=lambda x:x[1],reverse=True) print(reverse_dict[:10])