数据存储 Json
一、JsonLInesEx
1 from scrapy.exporters import JsonLinesItemExporter 2 class JsonLinesItemExporterPipeline(object): 3 def __init__(self): 4 self.file = open('jsonfile.json', 'wb') # 必须写入二进制 5 self.exporter = JsonLinesItemExporter(self.file, ensure_ascii=False, encoding='utf-8') 6 def process_item(self, item, spider): 7 self.exporter.export_item(item) 8 print(item) 9 def close_item(self, spider): 10 self.file.close() 11 pass
二、自定义方法保存json文件
1 import json 2 3 # 自定义处理json保存 4 class QsbkDemoPipeline(object): 5 def __init__(self): 6 self.file = open('qsbk.json', 'w', encoding='utf-8') 7 8 def open_spider(self, spider): 9 print('爬虫开始了...') 10 pass 11 12 def process_item(self, item, spider): 13 # 这里需要把item转换字典 14 item_json = json.dumps(dict(item), ensure_ascii=False) 15 self.file.write(item_json+' ') 16 return item 17 18 def close_spider(self, spider): 19 self.file.close() 20 print('爬虫结束了...') 21 pass
三、JsonItemExporter保存json
1 from scrapy.exporters import JsonItemExporter 2 3 # 利用scrapy自带json保存 4 class JsonExporterPipeline(object): 5 def __init__(self): 6 self.file = open('qsbk_1.json', 'wb') # 必须二进制写入 7 self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) 8 # 开始写入 9 self.exporter.start_exporting() 10 11 def open_spider(self, spider): 12 print('爬虫开始') 13 pass 14 15 def process_item(self, item, spider): 16 self.exporter.export_item(item) 17 return item 18 19 def close_spider(self, spider): 20 # 完成写入 21 self.exporter.finish_exporting() 22 self.file.close() 23 pass