• CSV 文件批量导入 数据库 Pandas


    Pandas
    import pandas as pd
    from sqlalchemy import create_engine
    from sqlalchemy.engine import Engine
    import traceback
    import math
    
    class MSSQL:
    	host = "10.251.21.1XXX"
    	port = 3306
    	dbname = "AnyXXXX"
    	user = "root"
    	passwd = "AXXXXXX"
    
    def connect_AS_DB():
    	try:
    		conn = create_engine ('mysql+pymysql://{user}:{password}@{host}:{port}/{dbname}'.format(user=MSSQL.user,
    																					password=MSSQL.passwd,
    																					host=MSSQL.host,
    																					port=int(MSSQL.port),
    																					dbname=MSSQL.dbname),echo = False)
    		return conn
    
    	except Exception as e:
    		print("Connect to MSSQL exception:%s"% traceback.format_exc())
    		return None
    
    
    if __name__ == '__main__':
    	#定义每次分割10行
    	lines=10 
    	try:
    		conn =connect_AS_DB()
    	except Exception as E:
    		print(E)
    	
    	df= pd.read_csv(r'C:\LocalTemp\Python 拆分大文件\操作日志.2022-4-9~2022-4-19.csv',iterator=False, error_bad_lines=True,encoding='utf-8',names=['DateTime', 'UserName', 'Catelog', 'Action', 'IP', 'Unknown','Record', 'Path', 'Client', 'obj_id'] )
    	# print(DataInfo_iterator["DateTime"])
    
    
    	for i in range(0, math.ceil(df.shape[0]/lines)):
    		DataInfo = df.iloc[i*lines:(i+1)*lines]
    		if conn:
    			try:
    				print(DataInfo["DateTime"])
    				print('*'*50+str(i)+'*'*50)
    				# DataInfo.to_sql('ASLog', conn, if_exists="append", index=False)
    
    			except Exception as e:
    				print(e)
    		else :
    			print("connect to sql error")
    
    
    
    

    版本做了优化 支持多个文件的导入,同时解决第九列文件内含有“,”的问题

    点击查看代码
    import pandas as pd
    from sqlalchemy import create_engine
    from sqlalchemy.engine import Engine
    import traceback
    import math
    
    class MSSQL:
    	host = "10.251.21.XXX"
    	port = 3306
    	dbname = "Anyshare"
    	user = "root"
    	passwd = "AlXXX"
    
    #链接Anyshare 日志数据库
    def connect_AS_DB():
    	try:
    		conn = create_engine ('mysql+pymysql://{user}:{password}@{host}:{port}/{dbname}'.format(user=MSSQL.user,
    																					password=MSSQL.passwd,
    																					host=MSSQL.host,
    																					port=int(MSSQL.port),
    																					dbname=MSSQL.dbname),echo = False)
    		return conn
    	except Exception as e:
    		print("Connect to MSSQL exception:%s"% traceback.format_exc())
    		return None
    
    
    
    
    def Writeto_Sql(arr_Lists,columns_Num):
    	for arr in arr_Lists:
    		print(arr[0])
    		df=pd.DataFrame(arr,columns=columns_Num )
    		df.to_sql('ASLog', conn, if_exists="append", index=False)
    		# print(df)
    
    
    
    #将以迭代返回列表的形式返回数据
    def get_CleanFileInfo(File_list):
    	for file_Name in File_list:
    		
    		with open(r'C:\LocalTemp\Python 拆分大文件\{file}'.format(file=file_Name),"r", encoding="utf-8") as f:
    			lines=f.readlines()
    			i=0
    		arr_List=[]
    		for line in lines:
    			i+=1
    			
    			vals=line.split(',')
    			if len(vals)==10:
    				arr_List.append(vals)
    				# arr_List=vals
    			elif len(vals)>10:
    				arr_List.append(vals[:8]+[','.join(vals[8:-1])]+vals[-1:])
    			if i % 100==0: #数据库commit 需要消耗时间,将这里调整从1 调整到100后明显发现CPU 、内存消耗更多,速度也更快
    				yield(arr_List)
    				arr_List=[]
    		yield(arr_List)
    
    
    
    if __name__ == '__main__':
    	try:
    		conn =connect_AS_DB()
    	except Exception as E:
    		print(E)
    	#数据有效列数
    	columns=['DateTime', 'UserName', 'Catelog', 'Action', 'IP', 'MAC','Record', 'Path', 'Client', 'obj_id']
    	#待导入文件列表
    	File_list=["操作日志.2022-1.csv","操作日志.2022-2.csv","操作日志.2022-3.csv","操作日志.2022-4-1~2022-4-3.csv","操作日志.2022-4-3~2022-4-9.csv","操作日志.2022-4-9~2022-4-19.csv"]
    
    
    	#获取返回后的文件信息列表,返回迭代器
    	infoline_iter=get_CleanFileInfo(File_list)
    	#将获取的信息写入数据库
    	Writeto_Sql(infoline_iter,columns)
    
  • 相关阅读:
    动态加载JS脚本【转】
    定义并且立即执行JS匿名函数拾遗
    javascript操作ASCII码与字符对转
    win7的mklink命令
    [Yii Framework] How to get the current static page name?
    [Ubuntu] 利用Ubuntu光盘破解win7用户登录 Crark the win7 user via Ubuntu live CD
    [Ubuntu] reload the .bashrc file without logout nor restart.
    [Ubuntu] the permissions of lampp mysql and phpmyadmin
    [Zend PHP5 Cerification] Some note when studying
    [eZ publish] How to modify the $view_parameters valus in the template.
  • 原文地址:https://www.cnblogs.com/vmsky/p/16173237.html
Copyright © 2020-2023  润新知