1 #coding:utf-8 2 import requests,os,re 3 from bs4 import BeautifulSoup 4 from selenium import webdriver 5 from selenium.webdriver.chrome.options import Options 6 from selenium.webdriver.common.keys import Keys 7 8 class downloader(): 9 10 def __init__(self): 11 self.urls = [] # 保存章节链接 12 self.name = [] # 保存章节名 13 self.url = 'https://so.biqusoso.com/s.php?ie=utf-8&siteid=biqugex.com&q=' 14 15 """输入小说名,搜索""" 16 def Get_url(self): 17 #创建chrome参数对象,设置chrome浏览器无界面模式 18 chrome_options = Options() 19 chrome_options.add_argument('--headless') 20 # 创建chrome无界面对象 21 browser = webdriver.Chrome(options=chrome_options) 22 browser.get(self.url) 23 c = input('请输入小说全名:') 24 browser.find_element_by_xpath('//*[@id="wrapper"]/div[1]/div[2]/form/input[3]').send_keys(c) 25 browser.find_element_by_xpath('//*[@id="wrapper"]/div[1]/div[2]/form/input[4]').click() 26 new_url = browser.current_url 27 # 关闭浏览器 28 browser.close() 29 # 关闭chromedriver进程 30 browser.quit() 31 print("已关闭浏览器") 32 # print(new_url) 33 response = requests.get(new_url) 34 response.encoding = 'utf-8' 35 soup = BeautifulSoup(response.text, 'lxml') 36 # print(soup) 37 name1 = soup.find_all('span', class_='s2') 38 soup = BeautifulSoup(str(name1), 'lxml') 39 new_name = soup.find('a') 40 new_name1 = new_name.string 41 # print(new_name1) 42 self.href = new_name.attrs['href'] 43 print(self.href) 44 return self.href 45 def Response(self): 46 response = requests.get(self.href) 47 response.encoding = 'gbk' # 解决乱码 48 self.soup = BeautifulSoup(response.text, 'lxml') # 解析网页 49 div = self.soup.find_all('div', class_='listmain') # 在解析结果中查找class_='listmain' 50 soup1 = BeautifulSoup(str(div), 'lxml') # 删除字符串头和尾的空格 51 h = soup1.find_all('a') # 在class_='listmain下面找到a标签 52 for i in h: 53 self.name.append(i.string) # 将a标签中的非属性字符,即章节名添加到name 54 self.urls.append('https://www.biqugex.com%s' % i.get('href')) # 将a标签中的链接,添加到urls 55 56 def file(self): 57 """查找小说名字,并创建同名文件夹""" 58 div1 = self.soup.select('body > div.book > div.info > h2') 59 a = BeautifulSoup(str(div1), 'lxml') 60 b = a.find('h2') 61 b = b.string 62 c = 'C:\Users\Administrator\Desktop\%s' % b 63 if not os.path.exists(c): 64 os.mkdir(c) 65 66 # 循环解析urls,得到小说正文 67 i = 0 68 while i < len(self.urls): 69 response1 = requests.get(url=self.urls[i]) 70 response1.encoding = 'gbk' 71 soup2 = BeautifulSoup(response1.text, 'lxml') 72 d = soup2.find_all('div', id='content') 73 id1 = BeautifulSoup(str(d), 'lxml') 74 # 创建文件名 75 src = self.name[i] + '.txt' 76 filename = c + '/' + src 77 print(filename) 78 79 # 将解析到的小说正文写到文件中 80 for result in id1: 81 res = result.text 82 id2 = soup2.select('#content') 83 with open(filename, 'w+', encoding='utf-8') as f: 84 f.write(res) 85 i += 1 86 #如果输入的网址不是正确的网址,则提示请输入正确的笔趣阁网址 87 def Main(self): 88 try: 89 d = downloader() 90 d.Get_url() 91 except: 92 print('没有找到') 93 else: 94 d.Response() 95 d.file() 96 97 98 99 if __name__ == '__main__': 100 # url=input('请输入网址:') 101 # url='https://www.biqugex.com/book_104027/' 102 a = downloader() 103 a.Main()