1 # -*- coding:utf-8 -*- 2 import re 3 4 5 ''' 6 适应新版本 7 ''' 8 9 10 year='17A'#用户自定义 11 ss='./data/'#根目录 12 filename = ss+'EDSD%s.txt'%year#输入文件名 13 14 15 16 17 def trsd_nonote(): 18 19 20 p1 = r"^s{4}(?:X|W)s{2}([A-Z]{3})ss.+ "#TCC 21 p2 = r"s{4}(?:X|W)s{2}[A-Z]{3}ss(.+) " 22 """ 23 Function: To specify information regarding the transport 24 such as mode of transport, means of transport, 25 its conveyance reference number and the 26 identification of the means of transport. 27 """ 28 p3 = r"^s{7}Function:s(.+ww.) " 29 p4 = r"^s{7}Function:s(.+.g.|.+[^.]) " 30 # p4 = r"^s{7}Function:s(.+[.g.|[^.]]) " 31 p5 = r"^s{17}(w.+[^.]) " 32 p6 = r"^s{17}(.+.) " 33 34 #Note 35 # p7 = r"^s{7}Note:s "#Note 36 # p8= r"^s{12}([A-Z].+.) "#Note内容只有1行 37 # p9 = r"^s{12}(.+[^.]|) "#Note内容只多行的非最后行 38 # p10 = r"^s{12}(.+.) "#Note内容只多行的最后行 39 40 pattern1 = re.compile(p1) 41 pattern2 = re.compile(p2) 42 pattern3 = re.compile(p3) 43 pattern4 = re.compile(p4) 44 pattern5 = re.compile(p5) 45 pattern6 = re.compile(p6) 46 fr = open(filename) 47 # temp = ""; 48 flag = 0 49 for line in fr.readlines(): 50 matcher1 = re.findall(pattern1,line) 51 matcher2 = re.findall(pattern2,line) 52 matcher3 = re.findall(pattern3,line) 53 matcher4 = re.findall(pattern4,line) 54 matcher5 = re.findall(pattern5,line) 55 matcher6 = re.findall(pattern6,line) 56 #print matcher 57 w2 = open(ss+'trsd_nonote%s.txt'%year,'a')#a代表追加 w代表重写 58 if matcher1: 59 flag = 1 60 w2.write(" ") 61 for j in matcher1: 62 # for k in j: 63 w2.write(j) 64 if ((matcher2!=[])and(flag ==1)): 65 flag = 2 66 w2.write(",") 67 for j in matcher2: 68 # for k in j: 69 w2.write(j) 70 if ((matcher3!=[])and(flag ==2)): 71 flag = 3 72 #防止有逗号,用双引号括起 73 w2.write(","") 74 for j in matcher3: 75 # for k in j: 76 w2.write(j) 77 w2.write(""") 78 if ((matcher4!=[])and(flag ==2)): 79 flag = 4 80 w2.write(","") 81 for j in matcher4: 82 # for k in j: 83 w2.write(j) 84 if ((matcher5!=[])and(flag ==4 or 5)): 85 flag = 5 86 w2.write(" ") 87 for j in matcher5: 88 # for k in j: 89 w2.write(j) 90 # w2.write(""") 91 if ((matcher6!=[])and(flag ==4 or flag==5)): 92 flag = 6 93 w2.write(" ") 94 for j in matcher6: 95 # for k in j: 96 w2.write(j) 97 w2.write(""") 98 w2.close( ) 99 100 def trsd_note(): 101 102 p1 = r"^(?:s{7}|Xs{6}|Ws{6})([A-Z]{3})ss[A-Z].+$"#匹配1001 103 p2 = r"^s{7}Note:s "#Note 104 p3= r"^s{12}([^ ].+) "#Note内容 105 p4= r"^(?:-|컴)+ " 106 pattern1 = re.compile(p1) 107 pattern2 = re.compile(p2) 108 pattern3 = re.compile(p3) 109 pattern4 = re.compile(p4) 110 111 112 fr = open(filename) 113 w2 = open(ss+'trsd_note%s.txt'%year,'a')#a代表追加 w代表重写 114 # temp = (); 115 flag = 0 116 flag1=0 117 for line in fr.readlines(): 118 matcher1 = re.findall(pattern1,line) 119 matcher2 = re.findall(pattern2,line) 120 matcher3 = re.findall(pattern3,line) 121 matcher4 = re.findall(pattern4,line) 122 123 124 #print matcher 125 126 if matcher1!=[]: 127 flag = 1 128 w2.write(" ") 129 # for j in matcher1: 130 131 # w2.write(j) 132 133 if ((matcher2!=[])and(flag == 1)): 134 flag = 2 135 flag1=1 136 # w2.write(",") 137 if flag1==1: 138 if ((matcher3!=[])and(flag ==2 or 3)): 139 flag = 3 140 w2.write(" ") 141 for j in matcher3: 142 143 w2.write(j) 144 # w2.write(") 145 if ((matcher4!=[])and(flag == 3)): 146 flag=0 147 flag1=0 148 w2.write(" ") 149 w2.close( ) 150 fr.close() 151 152 def join(): 153 154 155 156 f1= open(ss+'trsd_note%s.txt'%year) 157 f2 =open(ss+'trsd_nonote%s.txt'%year) 158 159 list_note=[] 160 for line1 in f1: 161 # print(line1) 162 if line1.isspace(): 163 list_note.append('') 164 else: 165 list_note.append(line1) 166 167 f1.close() 168 169 # print(list_note) 170 f2_w= open(ss+'trsd%s.csv'%year,'a') 171 # for i in range(len(list_note)): 172 i=0 173 # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i]) 174 for line2 in f2: 175 176 str11="%s,"%s" "%(line2.strip(' '),list_note[i].strip(' ')) 177 i=i+1 178 # print(i) 179 # print(str11) 180 f2_w.write(str11) 181 182 183 f2_w.close() 184 f2.close() 185 if __name__ == '__main__': 186 trsd_nonote() 187 trsd_note() 188 join()