最近做一些数据库调研的工作,目标是实现影像更快的入库、出库、查询,并实现并行访问等操作。
将结果总结成一个mongoImg类,也算是小结吧。
1 ''' 2 Created on 2013-8-6 3 class mongoInsert 4 @author: tree 5 ''' 6 __metaclass__ = type 7 8 import os 9 from pymongo.database import Database 10 import time 11 import gridfs 12 13 class mongoImg(object): 14 """mongoInsert is a class for inserting document 15 16 17 """ 18 def __init__(self, database, dir): 19 """Create a new instance of :class:mongoInsert 20 :Parameters: 21 - `database`: database to use 22 - `dir` : directory of document 23 """ 24 if not isinstance(database, Database): 25 raise TypeError("database must be an instance of Database") 26 if len(dir) < 1: 27 raise TypeError("dir must be an string of directory") 28 29 # self.__con = Connection() 30 self.__imgdb = database 31 self.__imgfs = gridfs.GridFS (self.__imgdb) 32 self.__dir = dir 33 self.__filelist=[] 34 35 #save filepath in list.txt 36 def __dirwalk(self,topdown=True): 37 """traverse the documents of self.__dir and save in self.__filelist 38 """ 39 sum=0 40 self.__filelist.clear() 41 42 for root,dirs,files in os.walk(self.__dir,topdown): 43 for name in files: 44 sum+=1 45 temp=os.path.join(root,name) 46 self.__filelist.append(temp) 47 print(sum) 48 49 #insert image 50 def insert(self): 51 """insert images in mongodb 52 """ 53 self.__dirwalk() 54 55 tStart = time.time() 56 for fi in self.__filelist: 57 with open (fi,'rb') as myimage: 58 data=myimage.read() 59 self.__imgfs.put(data, content_type = "jpg", filename =fi) 60 61 tEnd =time.time () 62 print ("It cost %f sec" % (tEnd - tStart)) 63 64 #get image by filename 65 def getbyname(self,filename,savepath): 66 """get img from mongdb by filename 67 """ 68 if len(savepath) < 1: 69 raise TypeError("dir must be an string of directory") 70 dataout=self.__imgfs.get_version(filename) 71 try: 72 imgout=open(savepath,'wb') 73 data=dataout.read() 74 imgout.write(data) 75 finally: 76 imgout.close() 77
使用示例:也可以将数据库连接写在类内部
1 from pymongo import Connection 2 import mongoImg 3 4 filedir=r'D:image' 5 con = Connection() 6 db = con.imgdb 7 imgmongo=mongoImg.mongoImg(db,filedir) 8 imgmongo.insert()
感觉mongodb存储影像切片还是蛮快的,1w多个图片,大约100-200秒左右。
tip:
gridfs.GridFS.put 函数
put(data, **kwargs) Put data in GridFS as a new file. Equivalent to doing: try: f = new_file(**kwargs) f.write(data) finally f.close()
在存储读取图像时,犯了低级错误,将open得到的file实例当做数据存储,读取的时候怎么也读不出数据。。。囧
另外以字节流形式读取图像数据比较适合。
pipe = open('/dev/input/js0','rb')
如果以str形式存储的话,可能会出现UnicodeDecodeError错误,貌似是因为图像数据有些超出了python默认编码的存储区间。
ps:初学python 数据库操作也忘得差不多 欢迎大家批评和指正~