python写的,有点冗余,先码出来~~~~
这是data_stored.py的代码
1 # -*- coding:utf-8 -*- 2 # 存数据到mysql (只存了时间数字) 3 import pymysql 4 import csv 5 import datetime 6 import settings 7 from mysql import db 8 import os,time 9 import pandas as pd 10 import numpy as np 11 import threading 12 from apscheduler.schedulers.blocking import BlockingScheduler 13 from datetime import date 14 import csv 15 import utils 16 # filename = 'may_origin.csv' 17 # tablename = 'origin_data' 18 class data_stored(object): 19 def __init__(self): 20 pass 21 22 23 def data_stored(self,filename,tablename): 24 25 csvfile = open(filename,'r') 26 dict_reader = csv.DictReader(csvfile) 27 db.connect() 28 datas = [] 29 freq = 0 30 31 for row in dict_reader: 32 row = dict(row) 33 if row['']: 34 del(row['']) 35 #创建表 36 columns =[] 37 for i in row.keys(): 38 columns.append(i) 39 if (db.is_table_exist(tablename=tablename,dbname=settings.database) ==None and freq ==0 ): 40 db.create_table(tablename=tablename,columns=columns) 41 freq += 1 42 print("create is ok") 43 else: 44 pass 45 #插入数据 46 row['time'] = int(time.mktime(time.strptime(row['time'], '%Y-%m-%d %H:%M:%S'))) 47 datas.append(row) 48 db.insert_mysql_with_json(tablename, datas) 49 print("insert is ok") 50 db.disconnect() 51 52 def data_to_csv(self,filename,tablename,starttime,endtime,readfile=None,sep=None): 53 54 db.connect() 55 fieldNames, results = db.find(tablename, starttime, endtime) 56 57 58 data = [] 59 60 for fn in fieldNames: 61 data.append(fn) 62 #文件不存在 63 if readfile == None: 64 csvfile = open(filename, 'w') 65 writer = csv.writer(csvfile, dialect=("excel")) 66 # 插入列名 67 data_1 = [] 68 data_1 = sorted(set(data), key=data.index) 69 writer.writerow(data_1) 70 #文件存在 71 else: 72 csvfile = open(filename, 'a') 73 writer = csv.writer(csvfile, dialect=("excel")) 74 75 # 插入data 76 for info in results: 77 data_2 = [] 78 # for m_2 in info: 79 # data_2.append(m_2) 80 writer.writerow(info) 81 csvfile.close() 82 db.disconnect() 83 # # 84 # if __name__ == "__main__": 85 # filename='may_origin.csv' 86 # filename2='csvtest_05.csv' 87 # tablename = 'originData' 88 # st = 1462032004 89 # et = 1462032007 90 # 91 # t = data_stored() 92 # starttime = datetime.datetime.now() 93 # 94 # # t.data_stored(filename,tablename) 95 # t.data_to_csv(filename2,tablename, st,et) 96 # endtime =datetime.datetime.now() 97 # 98 # print(endtime-starttime) 99 ds = data_stored()
其中调用了mysql.py的代码就不码了,就是一些连接、断开数据库connect()、disconnect(),创建数据库create_table(),判断tablename是否存在is_table_exist,以及查列名find_columns(),查找数据find(),还有插入数据库数据insert_into_mysql()。
1 import settings 2 from mysql import db 3 import os 4 import csv 5 import utils 6 7 8 def time_main(start_time, end_time, tablename, columns=None): 9 timespan = settings.timespan 10 output_filename = 'sfhd_' + '_origin_' + utils.getDigitDay(start_time) + '.csv' 11 db.connect() 12 # 判断输出文件是否存在 :False为不存在 13 if os.path.isfile(output_filename) == False: 14 pass 15 else: 16 end_time = start_time 17 start_time = start_time -86400 18 output_filename = 'sfhd_' + '_origin_' + utils.getDigitDay(start_time) + '.csv' 19 20 21 # 隔一个时间段timespan存一次 22 with open(output_filename, 'w') as csvfile: 23 24 if columns == None: 25 columns = db.find_columns(tablename) 26 data = list(columns) 27 28 writer = csv.writer(csvfile, dialect=("excel")) 29 data_1 = sorted(set(data), key=data.index) 30 writer.writerow(data_1) 31 32 temp_time = start_time + timespan 33 current_time = start_time 34 35 while temp_time <= end_time+3: 36 utils.log_easy('time_main', utils.getTimeDes(temp_time)) 37 fieldNames, results = db.find(tablename, current_time, temp_time-1, columns) 38 # 插入data 39 for info in results: 40 writer.writerow(info) 41 current_time = temp_time 42 temp_time = current_time + timespan 43 db.disconnect()
这是简化过后的mysql数据存到CSV文件中。
常见的mode取值组合
r或rt 默认模式,文本模式读 rb 二进制文件 w或wt 文本模式写,打开前文件存储被清空 wb 二进制写,文件存储同样被清空 a 追加模式,只能写在文件末尾 a+ 可读写模式,写只能写在文件末尾 w+ 可读写,与a+的区别是要清空文件内容 r+ 可读写,与a+的区别是可以写到文件任何位置
在选择读文件的方式时,我发现 with open 打开文件并存数据,要比直接open打开存数据要快。因为前者可以在存好数据后就自动关闭文件,并且可以很好处理上下文的异常,还有清理工作。