没什么技术含量,直接贴代码:
#coding = utf8 from csdn.models import csdnuser #except: # print u'没有开启django shell' def d(): source = open('E:\\sync\\docments\\csdn.sql','r') i = 0 while True: all_temp = source.readline() if len(all_temp) == 0: break try: temp = all_temp.replace("'","''").decode('gb18030').split() user_temp = csdnuser(username = temp[0], userpass = temp[2], useremail = temp[4]) except: user_temp = csdnuser(username = u'qtsharp',userpass = u' ',useremail = u'qtsharp@qq.com') print 'wrong: %s\n' %i print all_temp user_temp.save() n = i + 1 if n>=10000 and n%10000==0: print u'已导入%s万个' % (n/10000) i += 1 print u'导入完成,共导入%s个。' %n source.close()
发现了一个很严重的问题,内存占用一直很高,而且不断在长,应该怎样及时释放内存呢?
暂时想不到好的办法,先改成下面这样,然后设定处理数量,分段进行。
#coding = utf8 try: from csdn.models import csdnuser except: print u'没有开启django shell' file_path = 'E:\\sync\\docments\\csdn.sql' #跳过num行 def pass_lines(num,source): for n in range(num): source.readline() #主函数 def d(begin_num,limit_num): #设定参数 i = begin_num limit = limit_num #初始化 source = open(file_path,'r') length = len(source.readlines()) source.close() if limit > length: limit = length #开始读取 source = open(file_path,'r') pass_lines(i,source) #跳过i行 #数据写入循环 while i<limit: all_temp = source.readline() try: temp = all_temp.decode('gb18030').split() user_temp = csdnuser(username = temp[0], userpass = temp[2], useremail = temp[4]) except: user_temp = csdnuser(username = u'qtsharp',userpass = u' ',useremail = u'qtsharp@qq.com') print 'wrong: %s\n' %i print all_temp #提交修改 user_temp.save() #进度提示 n = i + 1 if n>=10000 and n%10000==0: print u'已导入%s万个' % (n/10000) i += 1 print u'导入完成,共导入%s个' %(limit_num-begin_num) source.close()