import scrapy from datetime import datetime class BianSpider(scrapy.Spider): name = 'bian' # allowed_domains = ['www'] start_urls = ['http://tech.163.com/special/00097UHL/tech_datalist.js?callback=data_callback'] def parse(self, response): # print(response.body.decode('gbk')) import json ---《for i in json.loads(response.body.decode('gbk').strip('data_callback(').strip(')')):》--- print(i['title']) print(i['label']) time_list = i['time'] ---《print(datetime.strptime(time_list,'%m/%d/%Y %H:%M:%S'))》--- print(','.join([ii['keyname'] for ii in i['keywords']])) desc_href = i['docurl'] yield scrapy.Request(desc_href,self.show) def show(self,response): # print(response.xpath('//div[3]/div[2]/div[1]/div[1]//text()')) types = response.xpath("string(//div[@class='post_crumb'])").extract_first().strip() weizhi = ' '.join(response.xpath("//div[@class='post_crumb']//text()").extract()).strip() print(weizhi) print(response.xpath('//*[@id="ne_article_source"]/text()').extract()) print(response.xpath('//*[@id="endText"]/div[2]/span[2]/text()').extract())