• python爬煎蛋妹子图


     1 # python3
     2 # jiandan meizi tu
     3 import urllib
     4 import urllib.request as req
     5 import os
     6 import time
     7 import random
     8 
     9 
    10 def url_open(url):
    11     req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})
    12     req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})
    13     req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})
    14     req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})
    15     
    16     req_list = [req1, req2,req3, req4]
    17     response = urllib.request.urlopen(random.choice(req_list))
    18     html = response.read()
    19     # print ('url_open done!')
    20     return html
    21 
    22 def url_open2(url):
    23     req1 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.0'})
    24     req2 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.1'})
    25     req3 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/4.5'})
    26     req4 = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.1'})
    27     req_list = [req1, req2,req3, req4]
    28 
    29     ip_list = ['117.135.251.136:82']
    30     ip = random.choice(ip_list)
    31     print (ip)
    32 
    33     proxy = req.ProxyHandler({'http': ip})
    34     # auth = req.HTTPBasicAuthHandler()
    35     opener = req.build_opener(proxy, req.HTTPHandler)
    36     req.install_opener(opener)
    37     conn = req.urlopen(random.choice(req_list))
    38     return_str = conn.read()
    39     return return_str
    40 
    41 def get_current_page(url):
    42     html = url_open2(url).decode('utf-8')
    43     a = html.find('current-comment-page') + 23
    44     b = html.find(']',a)
    45     return html[a:b]
    46 
    47 def find_imgs(url):
    48     html = url_open2(url).decode('utf-8')
    49     img_addrs = []
    50     a = html.find('img src="http')
    51     while a != -1:        
    52         b = html.find('.jpg',a, a+255)
    53         if b != -1:
    54             img_addrs.append(html[a+9:b+4])
    55         else:
    56             b = a + 13
    57         a = html.find('img src="http', b)
    58     return img_addrs
    59 
    60 def save_imgs(folder,img_addrs):
    61     for each in img_addrs:
    62         filename = each.split('/')[-1]
    63         with open(filename,'wb') as f:
    64             img = url_open2(each)
    65             f.write(img)
    66 
    67 
    68 def download_mm(folder = 'xx',pages = 300):
    69     # os.mkdir(folder)
    70     os.chdir(folder)
    71     
    72     url = 'http://jandan.net/ooxx/'
    73     current_page_num = int(get_current_page(url))
    74     for i in range(pages):
    75         print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),'current_page_num', current_page_num)
    76         if i%3 == 0:
    77             print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...")
    78             time.sleep(2)
    79         current_page_num -= 1
    80         page_url = url + 'page-' + str(current_page_num) + '#comments'
    81         img_addrs = find_imgs(page_url)
    82         save_imgs(folder, img_addrs)
    83 
    84 if __name__ == '__main__':
    85     download_mm()
  • 相关阅读:
    装饰模式
    You can't specify target table 'a' for update in FROM clause
    Spring事务知识点
    JAVA中的volatile关键字
    验证HashSet和HashMap不是线程安全
    ZYNQ7000 通过FPGA Manager加载比特流
    verilog中可综合的task使用
    verilog条件编译
    Vivado debug异常现象
    Matlab相关函数使用
  • 原文地址:https://www.cnblogs.com/duanguyuan/p/5208586.html
Copyright © 2020-2023  润新知