Python 爬取煎蛋网妹子图片

 1 #!/usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 # @Date    : 2017-08-24 10:17:28
 4 # @Author  : EnderZhou (zptxwd@gmail.com)
 5 # @Link    : http://www.cnblogs.com/enderzhou/
 6 # @Version : $Id$
 7 
 8 import requests
 9 from bs4 import BeautifulSoup as bs
10 import threading
11 import Queue
12 import urllib
13 
14 class jiandan_ooxx(threading.Thread):
15     def __init__(self,queue):
16         threading.Thread.__init__(self)
17         self._queue = queue
18 
19     def run(self):
20         while not self._queue.empty():
21             url = self._queue.get_nowait()
22             self.spider(url)
23 
24     def spider(self,url):
25         r = requests.get(url = url)
26         soup = bs(r.content,'html.parser')
27         imges = soup.find_all(name='img',attrs={})
28         lists = []
29         for i in imges:
30             if 'border' in str(i):
31                 continue
32             elif 'onload' in str(i):
33                 lists.append(i['org_src'])
34                 print i['org_src']
35                 img = 'http:' + i['org_src']
36             else:
37                 lists.append(i['src'])
38                 print i['src']
39                 img = 'http:' + i['src']
40             name = img.split('/')[-1]
41             urllib.urlretrieve(img,filename=name)
42 
43 def main(number):
44     url = 'http://jandan.net/ooxx/page-'
45     headers = {}
46     queue = Queue.Queue()
47 
48     # 此处由最新页面开始爬取，默认爬取最新10页的图片，把number-11改成0即可爬取全部页面的图片。
49     for i in xrange(number,number-11,-1):
50         queue.put(url+str(i))
51     threads = []
52     thread_count = 10
53 
54     for i in range(thread_count):
55         threads.append(jiandan_ooxx(queue))
56 
57     for t in threads:
58         t.start()
59     for t in threads:
60         t.join()
61 
62 if __name__ == '__main__':
63     # 获取最新页码并传入main函数
64     r = requests.get('http://jandan.net/ooxx')
65     soup = bs(r.content,'html.parser')
66     string = soup.find_all(name='span',attrs={'class':'current-comment-page'})
67     number = int(string[1].string[1:-1]) 
68     main(number)

相关阅读:
谁是你心目中最优秀的ajax框架
23种设计模式（1）：单例模式
23种设计模式（8）：观察者模式
设计模式六大原则（3）：依赖倒置原则
23种设计模式（2）：工厂方法模式
oracle中给表和字段添加注释
单例模式讨论篇：单例模式与垃圾回收
设计模式六大原则（6）：开闭原则
mysql命名锦集
23种设计模式（3）：抽象工厂模式

原文地址：https://www.cnblogs.com/enderzhou/p/7422441.html