• trsd_extract_EDSD_new


      1 # -*- coding:utf-8 -*-
      2 import re
      3 
      4 
      5 '''
      6 适应新版本
      7 '''
      8 
      9 
     10 year='17A'#用户自定义
     11 ss='./data/'#根目录
     12 filename = ss+'EDSD%s.txt'%year#输入文件名
     13 
     14 
     15 
     16 
     17 def trsd_nonote():
     18 
     19 
     20     p1 = r"^s{4}(?:X|W)s{2}([A-Z]{3})ss.+
    "#TCC
     21     p2 = r"s{4}(?:X|W)s{2}[A-Z]{3}ss(.+)
    "
     22     """
     23            Function: To specify information regarding the transport
     24                      such as mode of transport, means of transport,
     25                      its conveyance reference number and the
     26                      identification of the means of transport.
     27     """
     28     p3 = r"^s{7}Function:s(.+ww.)
    "
     29     p4 = r"^s{7}Function:s(.+.g.|.+[^.])
    "
     30     # p4 = r"^s{7}Function:s(.+[.g.|[^.]])
    "
     31     p5 = r"^s{17}(w.+[^.])
    "
     32     p6 = r"^s{17}(.+.)
    "
     33 
     34     #Note
     35     # p7 = r"^s{7}Note:s
    "#Note
     36     # p8= r"^s{12}([A-Z].+.)
    "#Note内容只有1行
     37     # p9 = r"^s{12}(.+[^.]|)
    "#Note内容只多行的非最后行
     38     # p10 = r"^s{12}(.+.)
    "#Note内容只多行的最后行
     39 
     40     pattern1 = re.compile(p1)
     41     pattern2 = re.compile(p2)
     42     pattern3 = re.compile(p3)
     43     pattern4 = re.compile(p4)
     44     pattern5 = re.compile(p5)
     45     pattern6 = re.compile(p6)
     46     fr = open(filename)
     47     # temp = "";
     48     flag = 0
     49     for line in fr.readlines():
     50         matcher1 = re.findall(pattern1,line)
     51         matcher2 = re.findall(pattern2,line)
     52         matcher3 = re.findall(pattern3,line)
     53         matcher4 = re.findall(pattern4,line)
     54         matcher5 = re.findall(pattern5,line)
     55         matcher6 = re.findall(pattern6,line)
     56         #print matcher
     57         w2 = open(ss+'trsd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
     58         if matcher1:
     59             flag = 1
     60             w2.write("
    ")
     61             for j in matcher1:
     62                 # for k in j:
     63                     w2.write(j)
     64         if ((matcher2!=[])and(flag ==1)):
     65             flag = 2
     66             w2.write(",")
     67             for j in matcher2:
     68                 # for k in j:
     69                     w2.write(j)
     70         if ((matcher3!=[])and(flag ==2)):
     71             flag = 3
     72             #防止有逗号,用双引号括起
     73             w2.write(","")
     74             for j in matcher3:
     75                 # for k in j:
     76                     w2.write(j)
     77             w2.write(""")
     78         if ((matcher4!=[])and(flag ==2)):
     79             flag = 4
     80             w2.write(","")
     81             for j in matcher4:
     82                 # for k in j:
     83                     w2.write(j)
     84         if ((matcher5!=[])and(flag ==4 or 5)):
     85             flag = 5
     86             w2.write(" ")
     87             for j in matcher5:
     88                 # for k in j:
     89                     w2.write(j)
     90             # w2.write(""")
     91         if ((matcher6!=[])and(flag ==4 or flag==5)):
     92             flag = 6
     93             w2.write(" ")
     94             for j in matcher6:
     95                 # for k in j:
     96                     w2.write(j)
     97             w2.write(""")
     98     w2.close( )
     99 
    100 def trsd_note():
    101 
    102     p1 = r"^(?:s{7}|Xs{6}|Ws{6})([A-Z]{3})ss[A-Z].+$"#匹配1001
    103     p2 = r"^s{7}Note:s
    "#Note
    104     p3= r"^s{12}([^ ].+)
    "#Note内容
    105     p4= r"^(?:-|컴)+
    "
    106     pattern1 = re.compile(p1)
    107     pattern2 = re.compile(p2)
    108     pattern3 = re.compile(p3)
    109     pattern4 = re.compile(p4)
    110 
    111 
    112     fr = open(filename)
    113     w2 = open(ss+'trsd_note%s.txt'%year,'a')#a代表追加 w代表重写
    114     # temp = ();
    115     flag = 0
    116     flag1=0
    117     for line in fr.readlines():
    118         matcher1 = re.findall(pattern1,line)
    119         matcher2 = re.findall(pattern2,line)
    120         matcher3 = re.findall(pattern3,line)
    121         matcher4 = re.findall(pattern4,line)
    122 
    123        
    124         #print matcher
    125 
    126         if matcher1!=[]:
    127             flag = 1
    128             w2.write("
    ")
    129             # for j in matcher1:
    130                 
    131             #     w2.write(j)
    132 
    133         if ((matcher2!=[])and(flag == 1)):
    134             flag = 2
    135             flag1=1
    136             # w2.write(",")
    137         if flag1==1:
    138             if ((matcher3!=[])and(flag ==2 or 3)):
    139                 flag = 3
    140                 w2.write(" ")
    141                 for j in matcher3:
    142                     
    143                     w2.write(j)
    144             # w2.write(")
    145             if ((matcher4!=[])and(flag == 3)):
    146                 flag=0
    147                 flag1=0
    148     w2.write("
    ")
    149     w2.close( )
    150     fr.close()
    151 
    152 def join():
    153 
    154 
    155 
    156     f1= open(ss+'trsd_note%s.txt'%year)
    157     f2 =open(ss+'trsd_nonote%s.txt'%year) 
    158 
    159     list_note=[]
    160     for line1 in f1:
    161         # print(line1)
    162         if line1.isspace():
    163             list_note.append('')
    164         else:
    165             list_note.append(line1)
    166          
    167     f1.close()
    168 
    169     # print(list_note)
    170     f2_w= open(ss+'trsd%s.csv'%year,'a')  
    171     # for i in range(len(list_note)):
    172     i=0
    173         # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
    174     for line2 in f2:
    175 
    176         str11="%s,"%s"
    "%(line2.strip('
    '),list_note[i].strip('
    '))
    177         i=i+1
    178         # print(i)
    179         # print(str11)
    180         f2_w.write(str11)
    181 
    182 
    183     f2_w.close() 
    184     f2.close()
    185 if __name__ == '__main__':
    186     trsd_nonote()
    187     trsd_note()
    188     join()
  • 相关阅读:
    PHP:第一章——PHP中字符运算符、比较运算符、错误控制运算符
    PHP:第一章——PHP中逻辑运算符的使用方法
    PHP:第一章——PHP中的算术运算符/递增、递减运算符/赋值运算符
    微信小程序通过js动态修改css样式的方法(交流QQ群:604788754)
    微信小程序跨页面获取数据示例
    JavaEE资源
    java 学习路线
    想以编程为职业,现在正在看毕向东的java基础,接下来应该看什么视频,求前辈们指教。
    2017Java学习路线图,内附完整Java自学视频教程+工具经验+面试
    各种 学习路线图专区
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/7427676.html
Copyright © 2020-2023  润新知