items.py import scrapy class InsistItem(scrapy.Item): positionname=scrapy.Field() type=scrapy.Field() place=scrapy.Field() mian=scrapy.Field() time=scrapy.Field() pipelines.py import json import scrapy import pymysql from scrapy.pipelines.images import ImagesPipeline class InsistPipeline(object): def __init__(self): self.db=pymysql.connect(host='localhost',user='dsuser',passwd='badpassword',db='dsdb',charset='utf8',port=3306) self.cur=self.db.cursor() def process_item(self, item, spider): sql='INSERT INTO job(name,type,place,mian,time) VALUES(%s,%s,%s,%s,%s) ' self.cur.execute(sql,(item['positionname'],item['type'],item['place'],item['mian'],item['time'])) self.db.commit() return item def close_spider(self, spider): self.cur.close() self.db.close() insisits.py #爬虫程序 import scrapy from insist.items import InsistItem import json class InsistsSpider(scrapy.Spider): name = 'insists' allowed_domains = ['careers.tencent.com'] #start_urls =['https://careers.tencent.com/search.html?index='] baseURL='https://careers.tencent.com/tencentcareer/api/post/Query?pageSize=10&pageIndex=' offset=1 start_urls=[baseURL+str(offset)] def parse(self, response): contents = json.loads(response.text) jobs = contents['Data']['Posts'] item = InsistItem() for job in jobs: item['positionname'] = job['RecruitPostName'] item['type'] = job['BGName'] item['place'] = job['LocationName'] item['mian'] = job['CategoryName'] item['time'] = job['LastUpdateTime'] yield item#返回后继续执行数据 if self.offset<=5: self.offset+=1 url=self.baseURL+str(self.offset) yield scrapy.Request(url,callback=self.parse)