• 爬取7160美女图片


    #coding=utf-8
    
    import urllib.request
    from bs4 import BeautifulSoup
    from urllib import error
    import re
    ls = ['zhenrenxiu','meinv',"lianglichemo",'rentiyishu','xiaohua']
    def validateTitle(title):
        rstr = r"[/\:*?"<>|]"  # '/  : * ? " < > |'
        new_title = re.sub(rstr, "_", title)  # 替换为下划线
        return new_title
    
    for j in range(1,60000):
       url_origin = "http://www.7160.com/xiaohua/"+str(j)
       try:
          page_obj = urllib.request.urlopen(url_origin)
          page_soup = BeautifulSoup(page_obj,'lxml')
          total_page_obj = page_soup.find(text=re.compile('')).string
          pattern = re.compile(r'd+')
          match = pattern.search(total_page_obj)
    
          if match == None:
             total_page = 0;
          else:
             total_page = match.group();
    
          for i in range(1,int(total_page)):
             if i == 1 :
                url = url_origin+"/index.html"
             else:
                url = url_origin+"/index_"+str(i)+".html"
             request = urllib.request.Request(url)
             try:
                res = urllib.request.urlopen(request)
    
                soup = BeautifulSoup(res,'lxml')
                title_obj = soup.find(attrs={"class":"picmainer"})
    
                if title_obj is not None:
                   print(url)
                   title = title_obj.h1.string
                   content = soup.find('img')
                   src = content.get("src")
    
                   file_name = validateTitle(title)+".jpg"
                   urllib.request.urlretrieve(src, "D://img2/"+file_name)
                   print(file_name+"保存成功")
             except Exception  as e:
                print("异常"+str(j))
       except Exception  as e:
                print("异常"+str(j))
  • 相关阅读:
    POJ 3411 Paid Roads(DFS)
    POJ 1699 Best Sequence(DFS)
    Codeforces Round #191 (Div. 2)
    Windows && Linux 搭建python开发环境
    zabbix 源码编译安装
    智能运维基础设施
    Redis
    ubuntu16.04 安装 mysql
    Python必须知道的基础语法
    ubuntu && CentOS && RedHat 离线安装docker
  • 原文地址:https://www.cnblogs.com/brady-wang/p/8321709.html
Copyright © 2020-2023  润新知