• 在scrapy中使用mongodb管道


    pipelines.py

    import json
    from scrapy.conf import settings
    from pymongo import MongoClient
    
    class SunPipeline(object):
        def open_spider(self,spider):
            self.file = open('dongguan.json','w')
    
        def process_item(self, item, spider):
            str_data = json.dumps(dict(item),ensure_ascii=False) + ',
    '
            self.file.write(str_data)
            return item
    
        def close_spider(self,spider):
            self.file.close()
    
    
    class MongoPipeline(object):
        def __init__(self):
            # 获取数据库参数
            host = settings['MONGO_HOST']
            port = settings['MONGO_PORT']
            dbname = settings['MONGO_DBNAME']
            colname = settings['MONGO_COLNAME']
    
            # 连接数据库
            self.client = MongoClient(host, port)
            # 选择数据库
            self.db = self.client[dbname]
    
            # 选择集合
            self.col = self.db[colname]
    
        def process_item(self, item, spider):
            data = dict(item)
    
            self.col.insert(data)
    
            return item
    
    
    
        def __del__(self):
            # 关闭数据库链接
            self.client.close()
    

      

    settings.py

    BOT_NAME = 'Sun'
    
    SPIDER_MODULES = ['Sun.spiders']
    NEWSPIDER_MODULE = 'Sun.spiders'
    
    
    MONGO_HOST = '127.0.0.1'
    MONGO_PORT = 27017
    MONGO_DBNAME = 'Sun'
    MONGO_COLNAME = 'dongguan'
    
    ITEM_PIPELINES = {
       'Sun.pipelines.SunPipeline': 300,
       'Sun.pipelines.MongoPipeline': 301,
    }
    

      

  • 相关阅读:
    Python常用第三方库总结
    Python爬虫技术--入门篇--爬虫介绍
    X sql解惑 25 里程碑问题 答案
    X sql解惑 34 咨询顾问收入问题
    从小变大的照片
    获取属性的顺序
    for...in
    判断元素是否存在
    自由的元素名称
    ES6语法糖-简洁属性表示
  • 原文地址:https://www.cnblogs.com/andy9468/p/8300029.html
Copyright © 2020-2023  润新知