1.Beginning
we'v learnt how to ask file from our own computer and tried to built a new wordcloud.
#1.读取文件内容
import jieba
import wordcloud
from imageio import imread
with open(r'C:UsersJinyyDesktop新建文件夹第十二期上课笔记day51.txt','r',encoding='utf8')as f:
data=f.read()
#2.使用结巴模块对文件内容切割
jieba.add_word('诺森德的守望')#让回家的诱惑成为单词
# jieba.del_word('先生')#删除
data_list=jieba.lcut(data)
data=''.join(data_list)
print(data)
#3.找到这招图片
img=imread(r'C:UsersJinyyDesktop新建文件夹第十二期上课笔记day4fill.png')
#3.使用词云模块生成词云
w=wordcloud.WordCloud(font_path='C:WindowsFontssimsun.ttc',background_color='white',mask=img)
w.generate(data)
w.to_file('诺森德的守望.png')
2.then
Nick was gone and Mr.tank are cuming(coming) to show us how to use network crawler to steal files from internet,that was really a cool trick now I can download porn video for freeeeeeeee!
'''
requests请求库
pip3 install requests
'''
import requests
# 1.发送请求
#get 发送请求
response=requests.get(
url='http://pic16_2.qiyeku.com/qiyeku_pic/2016/6/27/zsliying/product/product_pic/image/2016_07_11/20160711085044523.jpg'
)
#接受二进制流
# print(response.content)
#保存数据
with open('porn1.jpg','wb')as f:
f.write(response.content)
print('图片下载成功!')
response2=requests.get(
url='https://media.st.dl.bscstorage.net/steam/apps/256738414/movie_max.webm'
)
# print(response2.content)
#把二进制报仇呢本地
#凡是二进制流数据,不需要指定字符编码
with open('1.webm','wb')as f2:
f2.write(response2.content)
print('视频下载成功')
import requests
import re
#fasongqingqiu
response=requests.get(
url='http://www.xbiquge.la/15/15428/'
)
response.encoding = 'utf-8'
print(response.text)
'''
1电影名称
2电影详情页连接
3评分
4评价人数
'''
#解析并提取数据
#查找所有
# re.S:全局查找
# re.findall('正则匹配规则','解析文本',re.S)
#.*?是贪婪匹配
moviename=re.findall('.*?<div id="list">(.*?)</div>','response.text',re.S)
print(moviename)
the top was stealing picture and video ,the button was stealing text file
this won't be the last time we met/