• 最简单的爬虫——小白


    import requests
    from lxml import etree
    import os
    #页数
    page = 0
    #统计图片数量
    num = 1
    #网址链接
    start_url = "http://pic.netbian.com/index_{}.html"
    #1——1169页实际少一页
    for page in range(1, 1169):
    url = start_url.format(page)

    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'}

    #乱码解码
    response = requests.get(url, headers=headers).content.decode('gbk')
    etree_html = etree.HTML(response)
    # class="slist" ul li a href
    xpath_url = etree_html.xpath('//div[@ class="slist"]/ul/li/a/@href')
    for k in xpath_url:
    x_url = 'http://pic.netbian.com/' + k

    inner_html = requests.get(x_url, headers=headers).content.decode('gbk')

    xpath_img_url = etree.HTML(inner_html)
    # class="photo-pic" a img title
    img_title = xpath_img_url.xpath('//div[@ class="photo-pic"]/a/img/@title')
    # class="photo-pic" a img src
    img_resource = xpath_img_url.xpath('//div[@ class="photo-pic"]/a/img/@src')
    for title, img in zip(img_title, img_resource):
    img_title = title
    img_content = 'http://pic.netbian.com/' + img
    # print(img_title, img_content)
    image_content = requests.get(img_content, headers=headers).content
    with open('./不知火/{}.png'.format(img_title), 'wb') as f:
    f.write(image_content)
    print('已完成{}下载, 第{}张图片'.format(img_title, num))
    num += 1
  • 相关阅读:
    SQL点滴34—SQL中的大小写
    js关闭和打开页面
    jquery处理checkbox
    CKeditor 配置使用
    .net4.0中的ClientIDMode
    FCKEditor使用说明
    SQL对Xml字段的操作 拓荒者
    c#中BackGroundWorker控件 拓荒者
    C#中DataGradView控件的常用操作 拓荒者
    JavaScript操作Xml 拓荒者
  • 原文地址:https://www.cnblogs.com/LQ970811/p/11821199.html
Copyright © 2020-2023  润新知