下载文件
# coding: UTF-8
#20190106
#源文件 写入 批量下载加个for循环
##单线程下载
import requests
#源地址
str1="sogoupinyinzd423.rar"
url=r"http://down.sd173.com/soft1/"+str1
#目的地 下载的名字叫做 str1
path=str1
r=requests.get(url)
print ("下载完成")
with open(path,"wb") as f:
f.write(r.content)
f.close()
下载小说
#http://www.coblogs.com/Eva-J/articles/7228075.html#_label10
import requests
import re
import json
def getPage(url):
response = requests.get(url)
return response.text
def parsePage(s):
# com = re.compile(
# '<font color="#000">(?P<id>.*?)</font>.*?<td>(?P<kk>.*?)</td>', re.S)
# print(com)
# com = re.compile(
# '<font color="#000">(?P<id>.*?)</font>.*?<td>(?P<kk>.*?)</td>', re.S) pk
com = re.compile(
'<font color="#000">(?P<id>.*?)</font>.*?<tr><td>(?P<kk>.*?)</td></tr>', re.S)
# com = re.compile(
# '<font color="#000">(?P<id>.*?)</font>.*?<tbody>(?P<kk>.*?)</tbody>', re.S)
# com = re.compile(
# '<a href="/">(?P<id>.*?)</a>', re.S)
ret = com.finditer(s)
for i in ret:
yield {
"内容": i.group("kk"),
"题目": i.group("id"),
}
def main():
i=[595]
for j in i:
print(j)
url = "/538"+(str)(j)+".html"
# r = requests.get(url)
# r.encoding = 'utf8'
# print (r.text)
response_html = requests.get(url)
response_html.encoding = 'utf8'
# print(response_html.text)
ret = parsePage(response_html.text)
f = open("7.txt", "a", encoding="utf8")
for obj in ret:
print("ok"+(str)(j))
data = json.dumps(obj, ensure_ascii=False)
data = data.replace('<br />', '')
data = data.replace('\r\n', '')
f.write(data + "
")
if __name__ == '__main__':
main()