• 使用爬虫抓取王者荣耀英雄皮肤


    1:创建爬虫项目

    scrapy startproject wzry

    2:创建爬虫

    scrapy  genspider jishudaniu example.com

    3:启动爬虫

    scrapy crawl jishudaniu

    # -*- coding: utf-8 -*-
    import scrapy
    import os
    import urllib.request
    
    class JishudaniuSpider(scrapy.Spider):
        name = "jishudaniu"
        #allowed_domains = ["example.com"]
    
        #爬虫爬取链接的起点
        start_urls = ['https://pvp.qq.com/web201605/herolist.shtml']
    
        def parse(self, response):
            host_name="https://pvp.qq.com/web201605/"
            hero_list = response.xpath('//div[@class="herolist-box"]/div[@class="herolist-content"]/ul/li/a');#// 表示HTML网页结构中任意部位
            for link in hero_list:
                href=link.xpath('./@href').extract()[0]  #./表示当前
                detial_url=host_name+href
                yield scrapy.Request(detial_url, self.detial_parse)
                #print(href)
    
    
        def detial_parse(self,response):
            message=response.xpath('/html/body/script[10]/text()').extract()[0];
            heroName = message.split(",")[0].replace("'", "").split(" = ")[1]
            heroNo = message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1].strip()
            #print(message.split(",")[0].replace("'", "").split("=")[1]);
            #print(message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1]);
            heroSkinLinksTemplate = f"https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{heroNo}/{heroNo}-bigskin-"
    
            filePath = "E:\wzryimg\"
            if not os.path.exists(filePath + heroName):
                os.makedirs(filePath + heroName)
    
            skins = response.xpath('//div[@class="pic-pf"]/ul/@data-imgname').extract()[0]
            skin_list = skins.split("|")
            tempSkinList = []
            for skin in skin_list:
                tempSkinList.append(skin.split("&")[0])
            for index in range(0,len(tempSkinList)):
                #获取皮肤名称 os.sep:分割符
                skinname=tempSkinList[index]
                fileName="{}{}{}{}".format(filePath + heroName,os.sep,skinname,".jpg")
                print(heroSkinLinksTemplate)
                urllib.request.urlretrieve(heroSkinLinksTemplate + "{0}.jpg".format(index + 1), filename=fileName)
    

      

  • 相关阅读:
    JAVA LinkedList和ArrayList的使用及性能分析
    学习笔记—Node中的模块调试
    学习笔记—Node的核心模块
    学习笔记—Node中VM模块详解
    学习笔记—Node中require的实现
    入园了
    【引用】asp.net服务器推送(ServerPush)和客户端拉拽技术
    ajax xmlHttp.responseXML取不到值问题备忘
    oracle实时插值速度突然变慢问题解决办法
    [转帖 作者: fuyuncat 来源: www.HelloDBA.com ]Oracle IO问题解析
  • 原文地址:https://www.cnblogs.com/xqschool/p/14131026.html
Copyright © 2020-2023  润新知