1:创建爬虫项目
scrapy startproject wzry
2:创建爬虫
scrapy genspider jishudaniu example.com
3:启动爬虫
scrapy crawl jishudaniu
# -*- coding: utf-8 -*- import scrapy import os import urllib.request class JishudaniuSpider(scrapy.Spider): name = "jishudaniu" #allowed_domains = ["example.com"] #爬虫爬取链接的起点 start_urls = ['https://pvp.qq.com/web201605/herolist.shtml'] def parse(self, response): host_name="https://pvp.qq.com/web201605/" hero_list = response.xpath('//div[@class="herolist-box"]/div[@class="herolist-content"]/ul/li/a');#// 表示HTML网页结构中任意部位 for link in hero_list: href=link.xpath('./@href').extract()[0] #./表示当前 detial_url=host_name+href yield scrapy.Request(detial_url, self.detial_parse) #print(href) def detial_parse(self,response): message=response.xpath('/html/body/script[10]/text()').extract()[0]; heroName = message.split(",")[0].replace("'", "").split(" = ")[1] heroNo = message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1].strip() #print(message.split(",")[0].replace("'", "").split("=")[1]); #print(message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1]); heroSkinLinksTemplate = f"https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{heroNo}/{heroNo}-bigskin-" filePath = "E:\wzryimg\" if not os.path.exists(filePath + heroName): os.makedirs(filePath + heroName) skins = response.xpath('//div[@class="pic-pf"]/ul/@data-imgname').extract()[0] skin_list = skins.split("|") tempSkinList = [] for skin in skin_list: tempSkinList.append(skin.split("&")[0]) for index in range(0,len(tempSkinList)): #获取皮肤名称 os.sep:分割符 skinname=tempSkinList[index] fileName="{}{}{}{}".format(filePath + heroName,os.sep,skinname,".jpg") print(heroSkinLinksTemplate) urllib.request.urlretrieve(heroSkinLinksTemplate + "{0}.jpg".format(index + 1), filename=fileName)