一、MongoDB介绍
MongoDB 是一个基于分布式文件存储的数据库。由 C++ 语言编写。旨在为 WEB 应用提供可扩展的高性能数据存储解决方案。MongoDB 是一个介于关系数据库和非关系数据库之间的产品,是非关系数据库当中功能最丰富,最像关系数据库的。
在做爬虫的时候,抓取的数据是一些结构化的信息,字典结构的数据,在存储的过程中,不需要关心表的结构,可以直接将字典的结构进行动态的插入到MongoDB中即可,它是一个key-value关系的存储。
二、MongoDB安装(window下安装)
网盘下载:https://pan.baidu.com/s/19HfmwU0ibx2qMsF8E-8DWw 提取码:m03v
MongoDB可视化工具:Robo 3T,下载链接:https://robomongo.org/download
三、python安装MongoDB
pip install pymongo -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
四、python操作MongoDB
4.1 连接MongoDB
1 import pymongo 2 3 mongo_host = "localhost" 4 mongo_db = "mydb" 5 mongo_table = "people_info" 6 7 # 连接MongoDB 8 client = pymongo.MongoClient(mongo_host) 9 db = client[mongo_db] #使用mydb数据库,或者client.mydb,没有则创建 10 table = db[mongo_table] #创建表(集合),或db.people_info
4.2 往MongoDB插入一条数据
1 info = { 2 "name": "耗子尾汁", 3 "age": 20, 4 "sex":"girl", 5 "address":"北京海淀" 6 } 7 8 table.insert(info)
4.3 往MongoDB插入多条数据
1 results = { 2 "chinese": "优秀", 3 "English": "良好", 4 "math":"不及格", 5 "history":"优秀", 6 "political":"优秀", 7 "geographic":"极差" 8 } 9 10 table.insert([info,results])
4.4 查询数据
# 查询一条数据 res = table.find_one({'sex':'girl'}) print(res) #{'_id': ObjectId('5fdc76e1404d12a17a0c438a'), 'name': '耗子尾汁', 'age': 20, 'sex': 'girl', 'address': '北京海淀'} # 查询全部数据 results = table.find() print(type(results)) #<class 'pymongo.cursor.Cursor'> for i in table.find(): print(i)
4.5 更新数据
# 把sex:girl改为boy,第一个大括号里为更新条件,第二个大括号为更新之后的内容 table.update({"sex":"girl"},{"$set":{"sex":"boy"}})
4.6 删除数据
1 # 删除集合table中的所有数据 2 table.remove() 3 4 # 删除sex=girl的记录 5 table.remove({"sex":"girl"}) 6 7 # 删除全部数据,包括数据库名和表,慎重使用这个 8 # table.drop()
五、封装python操作MongoDB
1 import pymongo 2 import sys 3 4 class ConnectMongo(object): 5 6 def __init__(self,host="localhost",db='mydb',): 7 self.__host = host 8 self.__db = db 9 try: 10 client = pymongo.MongoClient(self.__host) 11 self.db = client[self.__db] 12 except Exception as e: 13 print(e) 14 15 def use_collection(self,collection): 16 try: 17 collect_table = self.db[collection] 18 except Exception as e: 19 print(e) 20 else: 21 return collect_table 22 23 def insert_one_data(self,data,collection): 24 """ 25 :param data: 插入的数据 26 :param collection: 插入集合 27 :return: 28 """ 29 try: 30 self.use_collection(collection).insert_one(data) 31 except Exception as e: 32 print(e) 33 34 def insert_many_data(self,documents,collection): 35 """ 36 :param args: 插入多条数据 37 :param collection: 38 :return: 39 """ 40 if not isinstance(documents,list): 41 raise TypeError("参数必须是一个非空的列表") 42 for item in documents: 43 try: 44 self.use_collection(collection).insert_many([item]) 45 except Exception as e: 46 print(e) 47 return None 48 49 def query_one_data(self,query_parame,collection): 50 "查询一条数据" 51 if not isinstance(query_parame,dict): 52 raise TypeError("查询参数必须为dict类型") 53 try: 54 res = self.use_collection(collection=collection).find_one(query_parame) 55 return res 56 except Exception as e: 57 print(e) 58 59 def query_all_data(self,collection,query_parame=None,limit_num = sys.maxsize): 60 "查询多条数据" 61 table = self.use_collection(collection) 62 if query_parame is not None: 63 if not isinstance(query_parame,dict): 64 raise TypeError("查询参数必须为dict类型") 65 try: 66 query_results = table.find(query_parame).limit(limit_num)# limit限制结果集查询数量 67 res_list = [res for res in query_results] 68 return res_list 69 except Exception: 70 return None 71 72 def update_collection(self,query_conditions,after_change,collection): 73 """ 74 :param query_conditions: 目标参数 75 :param after_change: 需要更改的数据 76 """ 77 if not isinstance(query_conditions,dict) or not isinstance(after_change,dict): 78 raise TypeError("参数必须为dict类型") 79 res = self.query_one_data(query_conditions,collection) 80 if res is not None: 81 try: 82 self.use_collection(collection).update_one(query_conditions,{"$set":after_change}) 83 except Exception as e: 84 print(e) 85 return None 86 else: 87 print("查询条件不存在") 88 89 def delete_collection(self,search,collection): 90 "删除一条数据" 91 if not isinstance(search,dict): 92 raise TypeError("参数必须为dict类型") 93 try: 94 self.use_collection(collection).delete_one(search) 95 except Exception as e: 96 print(e) 97 98 def delete_many_collection(self,search,collecton): 99 try: 100 self.use_collection(collecton).delete_many(search) 101 except Exception as e: 102 return None 103 104 def drop_collection(self,collection): 105 "删除集合" 106 try: 107 self.use_collection(collection).drop() 108 print("delete success") 109 except Exception: 110 return None