python 爬起点目录

 1 #目标：书名，简介，作者,字数
 2 #首先确定源代码的列表
 3 import urllib.request
 4 import re
 5 from bs4 import BeautifulSoup
 6 import random
 7 import time
 8 
 9 load=input("路径:")
10 num=input("输入页数：")
11 
12 
13 
14 
15 def gethtml(url):                     #获取页面源代码html
16     page=urllib.request.urlopen(url)
17     html=page.read().decode('utf-8')  #html是一个列表
18     soup=BeautifulSoup(html,'html.parser')
19     return soup
20 
21 def getbook(soup,load):
22     for i in range(1,21):
23 
24         xl=soup.find_all("li",{"data-rid":str(i)})
25         sm = re.compile(r'<h4><a .*?>(.*?)</a></h4>')    #匹配书名
26         sm1=sm.findall(str(xl))
27         a="《"+sm1[0]+"》"
28 
29         ze = re.compile(r'<a class="name" .*?>(.*?)</a>')
30         ze1 = ze.findall(str(xl))                        #匹配作者名
31         b=ze1[0]
32 
33         jj=re.compile(r'<p class="intro">([sS]*?)</p>')
34         jj1=jj.findall(str(xl))                          #匹配简介
35         c=jj1[0]
36 
37         zs=re.compile(r'<span>(.*?)</span>')
38         zs1=zs.findall(str(xl))
39         d=zs1[1]
40         content=[a,b,c,d]
41         
42         for j in range(0,4):
43             
44             with open(load, 'a') as f:
45                 if j == 3:
46                     f.write(content[3])
47                 else:
48                     f.write(content[j]+"
")
49                 
50         with open(load, 'a') as f:
51             f.write("

----------------------------------------------------------------------

")
52 def geturl(num):
53     for page in range(1,int(num)+1):
54         
55         url="http://fin.qidian.com/?size=-1&sign=-1&tag=-1&chanId=-1&subCateId=-1&orderId=&update=-1&page=%d&month=-1&style=1&vip=0" % page
56         
57         soup=gethtml(url)
58         getbook(soup,load)
59         time.sleep(2.5)
60         
61         
62 geturl(num)

实现

相关阅读:
使用PLSql连接Oracle时报错ORA-12541: TNS: 无监听程序
算法7-4：宽度优先搜索
R语言字符串函数
notepad++ 正则表达式
MySQL常用命令
linux下对符合条件的文件大小做汇总统计的简单命令
Linux系统下统计目录及其子目录文件个数
R: count number of distinct values in a vector
ggplot2 demo
R programming, In ks.test(x, y) : p-value will be approximate in the presence of ties

原文地址：https://www.cnblogs.com/jjj-fly/p/6896599.html