1 #!/usr/local/bin/python
2 #coding=UTF-8
3
4 import os
5 import csv as csv
6 import re
7 import shutil
8 import os.path
9 import datetime
10 import time 12 import gzip
13
14
15 csv.field_size_limit(1000000)
16
17
18
19
20
21
22 # 输入:
23 # timestr:要创建的数据文件的时间
24 # granulityPeriods:时间粒度
25 # oridir:原始csv文件的存放目录
26 # 文件名为PM201310271604+080024A20131027.1545+0800-20131027.1600+0800_101_Carrier_-_1.xml
27 # outputdir:修改后csv文件的输出目录,格式示例为'D:/OMC_DATA/HW/GSM/OMC1/TIL-HBSC-020/PM/DECODE/'
28
29 def ztetd15data(timestr,granulityPeriods,oridir,outputdir):
30
31 outpath = outputdir.strip()
32
33 if not os.path.exists(outpath):
34 os.makedirs(outpath)
35
36
37
38 createday = datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')
39 #createtime
40 createtime = createday.strftime('%Y%m%d%H%M')
41 delta = datetime.timedelta(minutes=granulityPeriods)
42 #createtime
43 #文件名开始时间
44 starttime = createday.strftime('%Y%m%d.%H%M')
45 #csv文件中collecttime:201204211100
46
47
48 createday = createday + delta
49
50 #文件名中endtime
51
52 endtime = createday.strftime('%Y%m%d.%H%M')
53 #csv文件名中endtime
54 modifycollecttime = createday.strftime('%Y%m%d%H%M')
55
56 newfilenametime = starttime+"+0800-"+endtime+"+0800"
57
58
59
60 find_file=re.compile(r".xml$")
61 for root ,dirs,files in os.walk(oridir):
62
63 for file in files:
64 if find_file.search(file):
65 filename = "%s"%(file)
66 inputpath = "%s"%(root+'/'+filename)
67 #inputpath = oridir+'/'+filename
68 print inputpath
69 #wangfanfan
70 print filename
71 fileprefix=filename[0:22]
72
73 filesuffix=filename[59:-4]
74 print fileprefix
75 print filesuffix
76 #wangfanfan
77
78 outfile = fileprefix+newfilenametime + filesuffix
79 print outfile
80 outputfile = outputdir+"/"+outfile+".xml"
81 print outputfile
#复制文件
82 shutil.copyfile(inputpath, outputfile)
83
84 #将原始xml文件压缩后删除
85 f_in = open(outputfile,'rb')
86 targetname=outputfile+".gz"
87 f_out = gzip.open(targetname,'wb')
88 f_out.writelines(f_in)
89 f_out.close()
90 f_in.close()
91
92 os.remove(outputfile)
93
94
95
96
97
98
99
100
101 #读取配置的csv文件
102 def readpath(oripath):
103
104 pathlist = []
105
106 fp = open(oripath,'r')
107 for line in fp:
108 print line
109 line = line.strip('
')
110 paths = []
111 paths = line.split(',')
112 pathdic = {}
113 pathdic["ori"] = paths[0]
114 pathdic["target"] = paths[1]
115 if not os.path.exists(paths[1]):
116 os.makedirs(paths[1])
117 pathlist.append(pathdic)
118
119 return pathlist
120
121
122
123
124
125
126
127
128
129
130 tdoripath = "/tomcat/***/CREATE_DATA/omcdatapathTDPM.csv"
131
132
133
134 def runBySystemTime():
135
136
137 start = time.clock()
138
139 nowtime = time.strftime("%Y-%m-%d %H:%M:%S")
140 timestr = nowtime
141
142 #gsmpathlist = readpath(tdoripath)
143 tdpathlist = readpath(tdoripath)
144
145 granulityPeriods = 15
146
147 for j in range(len(tdpathlist)):
148 path ={}
149 path = tdpathlist[j]
150 ztetd15data(timestr,granulityPeriods,path.get("ori"),path.get("target"))
151
152
153
154 finish = time.clock()
155
156 print "finished,couse:"
157 print (finish-start)
158 #time.sleep(60*60)
159
160
161
162 def runByDuration(begintime,endtime,durantion):
163
164
165 print begintime
166
167 while begintime <= endtime:
168
169 timestr =begin_time;
170
171 pathlist = readpath(oripath)
172
173 for j in range(len(pathlist)):
174 path ={}
175 path = pathlist[j]
176 ztegsm60data(timestr,granulityPeriods,path.get("ori"),path.get("target"))
177 daytime = datetime.datetime.strptime(begintime, '%Y-%m-%d %H:%M:%S')
178 delta = datetime.timedelta(minutes=durantion)
179 daytime = daytime + delta
180 begintime = daytime.strftime('%Y-%m-%d %H:%M:%S')
181 else:
182 print "end of while";
183
184
185
186
188 granulityPeriods = 60
189 #oripath = "C:/Users/Administrator/Desktop/python_wff/omcdatapathTDPM.csv"
190 #开始处理时间
191 begin_time = '2013-02-16 01:00:00'
192 #结束处理时间
193 end_time = '2013-02-17 01:00:00'
194
195 model = 2
196
197 if model == 1 :
198 runByDuration(begin_time,end_time,15)
199 elif model == 2 :
200 runBySystemTime()
201 else:
202 print "wrong args!"
203
204
读取csv文件的代码和上面类似。
1 def ztegsm60data(timestr,granulityPeriods,oridir,outputdir):
2
3 outpath = outputdir.strip()
4
5 if not os.path.exists(outpath):
6 os.makedirs(outpath)
7
8
9
10 createday = datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')
11
12 delta = datetime.timedelta(minutes=granulityPeriods)
13
14 #文件名开始时间
15 starttime = createday.strftime('%d%b%Y_%H%M')
16 #csv文件中collecttime:201204211100
17
18
19 createday = createday + delta
20
21 #文件名中endtime
22 endtime = createday.strftime('%d%b%Y_%H%M')
23 #csv文件名中endtime
24 modifycollecttime = createday.strftime('%Y%m%d%H%M')
25
26 newfilenametime = starttime+'-'+endtime
27
28
29
30 find_file=re.compile(r".csv$")
31 for root ,dirs,files in os.walk(oridir):
32
33 for file in files:
34 if find_file.search(file):
35 filename = "%s "%(file)
36 inputpath = "%s "%(root+'/'+filename)
37
38 #wangfanfan
39 fileprefix=filename[0:-34]
40 #wangfanfan
41
42 outfile = fileprefix+newfilenametime + ".csv"
43
44 outputfile = outpath+'/'+outfile
45
46 #写入第一行,字段名信息
47 reader = csv.reader(open(inputpath.strip(), 'rb'))
48 header = reader.next()
49 print inputpath
50
51 writer = csv.writer(open(outputfile.strip(),'wb'))
52 writer.writerow(header)
53 print outputfile
54
55 #修改时间相关的字段的内容
56 for row in reader:
57 row[0] = modifycollecttime
58 writer.writerow(row)
最难的是字符串到datetime的类型转换。还没有搞懂,只是在那试啊试啊,怎么走的通怎么走。