• 2、Python request、BeautifulSoup(download mm_pic)


    import requests
    from bs4 import BeautifulSoup
    import os
    class DownLoadImg(object):
    
        def __init__(self):
            self.url = 'http://jandan.net/ooxx/'
            self.header = {
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36'}
        def get_Page(self):
            response = self.open_url(self.url)
            page = response.text
            # print(page)
            soup = BeautifulSoup(page)
            page_c = soup.select('span.current-comment-page')
            # page_num = str(page_num).split('[')[-1].split(']')[0]
            page_num = page_c[0].next[1:-1]
            return page_num
    
        def open_url(self,url):
            response = requests.get(url,self.header)
            return response
    
        def get_and_save_Img(self):
            dir = os.path.exists('download_mm')
            if dir == False:
    
                os.mkdir('download_mm')
            os.chdir('download_mm')
    
            times = input('Please enter the number of pages to download:')
            i = 0
            num = int(self.get_Page())
            while i < int(times):
                url = self.url + 'page-' +str(num)
                print(url)
                response = self.open_url(url)
    
                html = response.text
                # print(html)
                bs = BeautifulSoup(html)
                img_list = bs.select('img')
                for img in img_list:
                    src ='http:' + img.attrs['src']
                    img_name = str(src).split('/')[-1]
                    get_img = requests.get(src, self.header)
    
                    with open(img_name,'wb') as dl:
                        dl.write(get_img.content)
    
                num -= 1
                i += 1
    
    if __name__ == "__main__":
    
        dl = DownLoadImg()
        dl.get_and_save_Img()
    

      

  • 相关阅读:
    ubuntu16.04下笔记本自带摄像头编译运行PTAM
    ar的主流算法
    ubuntu下安装meshlab
    打开.py文件的方法
    Python_ip代理
    对书名的抓取
    JS动态增加删除UL节点LI及相关内容示例
    js遍历json
    js移除某一类的div
    工厂模式
  • 原文地址:https://www.cnblogs.com/royfans/p/7417928.html
Copyright © 2020-2023  润新知