#-*- codeing =utf-8 -*- #@Time : 2022/5/28 17:30 #@Author : huaobin #@File : doupo.py #@Software: PyCharm import requests import re import time from bs4 import BeautifulSoup import os,sys count=0 spath="./斗破苍穹/doupo.txt" #f = open(path,'a+',encoding='utf-8') def get_info(url): print(url) res = requests.get(url) res.encoding='GB2312' if res.status_code == 200: soup=BeautifulSoup(res.text, 'html.parser') content=soup.select("div#content") content2=soup.select("div.bookname h1") if len(content2)!=0: print(content2[0].text) name = content2[0].text + ".txt" path = "./斗破苍穹/" + name f = open(path, 'a+', encoding='utf-8') f.write(content2[0].text + '\n') f.write('\n') for con in content: f.write(con.text) else: pass ''' 10317000 for con in content: f.write(con+'\n') else: pass ''' if __name__ =='__main__': urls = ['https://www.qbiqu.com/18_18902/{}.html'.format(str(i)) for i in range(10316495,10316500)] #最大页数10317000
for url in urls: get_info(url) time.sleep(1)