import pymongo
db_configs = {
'type': 'mongo',
'host': '127.0.0.1',
'port': '27017',
"user": "",
"password": "",
'db_name': 'spider'
}
class MongoPipeline():
def __init__(self):
self.db_name = db_configs.get("db_name")
self.host = db_configs.get("host")
self.port = db_configs.get("port")
self.username = db_configs.get("user")
self.password = db_configs.get("passwd")
def open_spider(self, spider):
self.client = pymongo.MongoClient('mongodb://{}:{}'.format(self.host, self.port), connect=False, maxPoolSize=10)
if self.username and self.password:
self.db = self.client[self.db_name].authenticate(self.username, self.password)
self.db = self.client[self.db_name]
def close_spider(self, spider):
self.client.close()
def process_item(self, item, spider):
collection_name = spider.name
self.db[collection_name].update_one({"url": item["url"]}, {'$set': item}, upsert=True)
return item