文本的操作
函数的排序操作:
def func(i):
return i[2]
list=[('曹操',101,'c'),('吕布',100,'d'),('刘备',200,'l'),('大乔',50,'x')]
list.sort(key=func)
#如果自己写个排序算法,无法如何都要把里面的值取出来一次的
print(list)
文本的读写操作:
##往文件内写入数据,覆盖写入
f=open(r"E:实习编程 1day061.txt",'w')
data=f.write('xxxxxxxxxxxx')
f.close()
##将文本中的内容读出来
f=open(r"E:实习编程 1day061.txt",'r')
data=f.read()
print(data)
f.close
##将一张图片读出来
f=open(r"E:实习编程 1day062.jpg",'rb')
data=f.read()
print(data)
f.close
词频统计:
英文:
f=open(r'E:实习编程 1day0622.txt','r') ##打开文件
data=f.read().lower() ##将文件内的内容转换为小写
data_split=data.split(' ') ##以空格进行分割
count_dict={} ##创建一个空字典
for word in data_split: ##开始for循环,判断word是否在字典内如果不
if word not in count_dict: ##在字典内初始值为1,如果在字典内加一
count_dict[word]=1
else:
count_dict[word]+=1
def func(i): ##定义一个func函数,把字典转换成一个列表
return i[1]
lt=list(count_dict.items())
lt.sort(key=func) ##把列表进行倒序排序
lt.reverse()
for i in lt[0:10]: ##用for循环循环列表并输出
print(f'{i[0]:^7}{i[1]^5}')
中文:
import jieba
f=open(r'E:实习编程 1day06 hreekingdoms.txt','r',encoding='utf8')
data=f.read()
data_jieba = jieba.lcut(data)
count_dict={}
for word in data_jieba:
if len(word)==1:
continue
if word in {"将军","却是","荆州","二人","不可","不幸","却说","不能","如此","商议","如何","追赶","二十余","听令","不计其数","欣然","大汉","丞相","主公","军士","左右","军马","不如","赶来","引兵","次曰","荆州","大喜","朝廷","当先","传令","次日","天下","东吴","于是","今日","不敢","魏兵","陛下"}:
continue
if '曰' in word:
word=word.replace('曰','')
if '云长' in word:
word=word.replace('云长','关公')
if word in count_dict:
count_dict[word]+=1
else:
count_dict[word]=1
def func(i):
return i[1]
data_list=list(count_dict.items())
data_list.sort(key=func)
data_list.reverse()
for i in data_list[0:10]: ##用for循环循环列表并输出
print(f'{i[0]:^7}{i[1]^5}')
词云:
import wordcloud
from imageio import imread
mask=imread(r'E:实习编程 1day061.png')
f=open(r'E:实习编程 1day0622.txt','r',encoding='utf8')
data=f.read()
w=wordcloud.WordCloud(font_path=r'C:WindowsFontssimfang',mask=mask,width=500,height=600,
background_color="white")
w.generate(data)
w.to_file('outfile.png')