• 使用爬虫抓取王者荣耀英雄皮肤


    1:创建爬虫项目

    scrapy startproject wzry

    2:创建爬虫

    scrapy  genspider jishudaniu example.com

    3:启动爬虫

    scrapy crawl jishudaniu

    # -*- coding: utf-8 -*-
    import scrapy
    import os
    import urllib.request
    
    class JishudaniuSpider(scrapy.Spider):
        name = "jishudaniu"
        #allowed_domains = ["example.com"]
    
        #爬虫爬取链接的起点
        start_urls = ['https://pvp.qq.com/web201605/herolist.shtml']
    
        def parse(self, response):
            host_name="https://pvp.qq.com/web201605/"
            hero_list = response.xpath('//div[@class="herolist-box"]/div[@class="herolist-content"]/ul/li/a');#// 表示HTML网页结构中任意部位
            for link in hero_list:
                href=link.xpath('./@href').extract()[0]  #./表示当前
                detial_url=host_name+href
                yield scrapy.Request(detial_url, self.detial_parse)
                #print(href)
    
    
        def detial_parse(self,response):
            message=response.xpath('/html/body/script[10]/text()').extract()[0];
            heroName = message.split(",")[0].replace("'", "").split(" = ")[1]
            heroNo = message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1].strip()
            #print(message.split(",")[0].replace("'", "").split("=")[1]);
            #print(message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1]);
            heroSkinLinksTemplate = f"https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{heroNo}/{heroNo}-bigskin-"
    
            filePath = "E:\wzryimg\"
            if not os.path.exists(filePath + heroName):
                os.makedirs(filePath + heroName)
    
            skins = response.xpath('//div[@class="pic-pf"]/ul/@data-imgname').extract()[0]
            skin_list = skins.split("|")
            tempSkinList = []
            for skin in skin_list:
                tempSkinList.append(skin.split("&")[0])
            for index in range(0,len(tempSkinList)):
                #获取皮肤名称 os.sep:分割符
                skinname=tempSkinList[index]
                fileName="{}{}{}{}".format(filePath + heroName,os.sep,skinname,".jpg")
                print(heroSkinLinksTemplate)
                urllib.request.urlretrieve(heroSkinLinksTemplate + "{0}.jpg".format(index + 1), filename=fileName)
    

      

  • 相关阅读:
    HTML5+php图片自由裁剪上传功能
    一个日期时间显示框的美化风格示例
    PHP+jquery 瀑布流+LightBox图片盒子特效
    类型
    异常语句
    穷举
    练习
    累加求和
    猜拳游戏(三局两胜)
    正则表达式
  • 原文地址:https://www.cnblogs.com/xqschool/p/14131026.html
Copyright © 2020-2023  润新知