• 文件合并


    #!/usr/bin/python
    #Filename :smelter_colmerge.py
    # -*- coding:utf-8 -*-
    
    import cx_Oracle
    import sys
    import getopt,string
    import re
    import os
    import datetime
    import os.path
    import configuration
    from Common import *
    
    class kk(object):
        def __init__(self):
            pass
            
    
        def conn_db(self):
            try: 
                job_config=configuration.configuration()
                user_name=job_config.loadConfigFile('config_test.xml','user_name')
                user_password=job_config.loadConfigFile('config_test.xml','user_password')
                dns=job_config.loadConfigFile('config_test.xml','dns')
                db=cx_Oracle.connect(user_name,user_password,dns)
                return db
            except Exception as e:                                                         
                print (-1,e)
           
    
        def c_process_id(self,pid):
            db=self.conn_db()
            cursor=db.cursor()
            pro_pid=cursor.var(cx_Oracle.NUMBER)
            cursor.callproc("common.process_id",[pid,pro_pid])
            return pro_pid.getvalue()
            db.close()
            
        def prhelp(self):
            print ''' Usage: smelter_colmerge.py [options]
            Options:     
            -h, --help            show this help message and exit
            -i, --interid         input file_id with a comma segmentation,can more values input
            -n, --processid       input process id
            -o, --targetdir       input targetdir
            -b, --before          assign to type code
            -t, --tableid         assign to table id 
    
            Example: python smelter_colmerge.py   -n 100008111 -o /hadoop/pigtmp/yu  -i 'I10173227','I10173228'           
               or    python smelter_colmerge.py   -n 100008111 -o /hadoop/pigtmp/yu  -b FILE_COLLECT -t 26
            '''
        #Parameters processing   
        def fun_param_input(self):
            TYPCD=0
            INTERID=0
            
            try:
                if len(sys.argv) > 6:
                    opts, args = getopt.getopt(sys.argv[1:],"i:n:o:b:t:h", ["interid=","processid=","targetdir=","before=","tableid=","help"])
                    #print(sys.argv)
                    #print(opts)
                    #print(args)
                    for a,o in opts:
                        if a in ('-i', '--interid'):
                            INTERID=o	
                            #print INTERID
                        elif a in ('-n','--processid'):
                            PROCESSID=o
                            #print PROCESSID
                        elif a in ('-o','--targetdir'):
                            targetdir=o
                            #print targetdir
                        elif a in ('-b','--before'):
                            TYPCD=o
                            #print o
                        elif a in ('-t','--tableid'):
                            table_id=o  
                            #print o  
                                         
                        elif a in ('-h', '--help'):                    
                            self.prhelp()
                            sys.exit()
                            
                    
                    if os.path.exists(targetdir):
                        if INTERID==0:
                            #print  PROCESSID,targetdir,TYPCD,0,table_id
                            return PROCESSID,targetdir,TYPCD,0,table_id
                            
                        else:
                            dd=INTERID.split(',')
                            #print '###################'
                            #print dd
                            #print '###################'
                            #dd=re.split(',',INTERID)
                            #print  PROCESSID,targetdir,0,dd,0
                            return PROCESSID,targetdir,0,dd,0
                    else:
                        meg='%s not exists'%target_directory
                        print (-1,meg)
                        
                    
                   
                else:
                    print 'Please input right parameters'
                    self.prhelp()
                    sys.exit()
            except Exception as e:
                return (-1,e)
                
                
        # find file        
        def fun_find_file(self):
            PROCESSID,targetdir,bftype,f_list,table_id=self.fun_param_input()
            #print f_list
            process_id=self.c_process_id(PROCESSID)
            db=self.conn_db()
            cursor=db.cursor()
            dm_t=myexec_type()
            tp_id=dm_t.code_to_id('FILE_MERGE')
            
            if bftype==0:
                i=0
                j=0
                source_file=[]
                
                if len(f_list)<=1:
                    meg='please input two or more'
                    print (-1,meg)
                    return 0,0,0,0,0,0
                
                else:
                    mg=[]
                    while i< len(f_list):
                        #print  tp_id
                        
                        cursor.execute('select distinct if_file_name,table_id,file_id,file_type,current_directory,data_date,hour_id from log_file where file_id=:fl_id and status=\'S\'and type_id <>:tp_id',fl_id=f_list[i],tp_id=tp_id)
                        a=cursor.fetchall()
                        
                        if len(a)==1:                    
                            while j <= i:
                                source_file.append(a[0])                       
                                j+=1                                                
                        else:
                            meg='%s is wrong parameter'%f_list[i]
                            mg.append(meg)                                                                                 
                        i+=1
                    if len(mg) > 1:
                        print (-1,mg)
                    if len(source_file)==len(f_list):                    
                        #print '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
                        #print i,source_file,tp_id,targetdir,process_id,source_file[0][1]
                        return i,source_file,tp_id,targetdir,process_id,source_file[0][1]
                    else:  
                    
                        
                        return 0,0,0,0,0,0
                        
            else:
                m=0            
                source_file=[]
                bf_id =dm_t.code_to_id(bftype)  
                #print bf_id
                cursor.execute('select max(process_id) from log_file where type_id=:tpp_id and table_id=:tbb_id and status=\'S\'',tpp_id=bf_id,tbb_id=table_id) 
                c=cursor.fetchall()                                                                                                                          
                pcsid=c[0][0]           
                cursor.execute('select distinct if_file_name, table_id,file_id,file_type,current_directory,data_date,hour_id  from log_file where  status=\'S\'  and process_id=:pdd and table_id=:tbb_id and type_id <>:tp_id',pdd=pcsid,tbb_id=table_id,tp_id=tp_id)
                a=cursor.fetchall() 
                if len(a)>=2:                   
                    while m<len(a):              
                        source_file.append(a[m])   
                        m+=1
                    #print "############################################################"
                    #print m,source_file,tp_id,targetdir,process_id,table_id  
                    return m,source_file,tp_id,targetdir,process_id,table_id     
                else:               
                    meg='File number less than two'
                    print (-1,meg)  
                    return 0,0,0,0,0,0
            db.close()            
       
       
        # search  if exit the merge file                                                                                                                                     
        def fun_targetdir(self):
            f_num,source_file,tp_id,targetdir,process_id,table_id=self.fun_find_file()
            if f_num == 0:
                
                return 0,0,0,0,0,0
            else:
                newname = 'merge'+source_file[0][0]
                
                targetname = os.path.join(targetdir,newname)
                
                if os.path.exists(targetname):
                    meg='%s already exits'%targetname
                    print (-1,meg)
                    return 0,0,0,0,0,0
                else:
                    
                    #print f_num,source_file,tp_id,targetdir,process_id,table_id
                    return f_num,source_file,tp_id,targetdir,process_id,table_id
        
        
        
        # Judgment field is consistent  
        def fun_colnum(self):
            try:
                f_num,source_file,tp_id,targetdir,process_id,table_id=self.fun_targetdir()
                db=self.conn_db()
                cursor=db.cursor()
                dm_t=myexec_type()
                vf_id=dm_t.code_to_id('IF_FILE_VERIFY_MASK')
                
                cursor.execute('select fields_terminated_by from dm_if_table where table_id=:tbl_id',tbl_id=table_id)
                d=cursor.fetchall()
                f_termin=d[0][0]         
                f_termin=f_termin.replace('\'',"")
                f_termin=f_termin.replace("\"",'')
                
                
                cursor.execute('select max(position_end),max(column_seq) from (select position_start,column_seq,position_end from dm_if_column where table_id=:tbl_id order by 2)',tbl_id=table_id)
                c=cursor.fetchall()
                pos_end=c[0][0]
                column_seq=c[0][1]  
                
                cursor.execute('select fields_terminated_by from dm_if_filemask where table_id=:tbl_id and type_id=:ty_id',tbl_id=table_id,ty_id=vf_id)
                d=cursor.fetchall()
                v_termin=d[0][0]
                v_termin=v_termin.replace('\'',"")
                v_termin=v_termin.replace("\"",'')
                                      
                #print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
                #print vf_id
                #print f_termin,pos_end,column_seq
                #print v_termin
                          
                if f_num==0:
                    pass          
                else:
                    w=0
                    i_num=0
                    v_num=0
                    i_file=[]
                    v_file=[]
                    
                    while w < f_num: 
                                     
                        if source_file[w][2].startswith('I'):
                            i_file.append(source_file[w])
                            i_num+=1
                        else:
                            v_file.append(source_file[w])
                            v_num+=1
                        
                        w+=1           
                    
                                                    
                    z=0
                    meg=0
                    m=900
                    h=200 
                    j=100
                    f_llth=[]
                    v_llth=[]
                    mgg=[]
                    
                    while z < f_num:
                        
                        s_name =os.path.join(source_file[z][4],source_file[z][0])
                        cursor.callproc("common.Log_file_initial",[process_id,source_file[z][1],source_file[z][2] ,tp_id,source_file[z][3]])
                        cursor.callproc("common.Log_file_start",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],source_file[z][5],source_file[z][6],source_file[z][0],source_file[z][4]])
                        try:                        
                            ff=open(s_name,'r')   
                        except Exception as e:
                            print (-1,'%s open error %s'%(s_name,e))  
                              
                        line=ff.readline()                    
                        line=line.strip('\n')                                                              
                        if source_file[z] in i_file:                                                                 
                            if pos_end > 1:                          
                                #print "?????????????????????????????????"
                                #print len(line)
                                #print pos_end
                                #print line                                                 
                                if len(line)==pos_end:
                                    m+=1
                                else:
                                    meg='%s  number of field not consistent' %source_file[z][2]
                                    mgg.append(meg)
                                                              
                                                                                                                              
                            else:                                                                                                                                                                   
                                f_ll=line.split(f_termin)                                                                                
                                fl_num=len(f_ll)                                                       
                                f_llth.append(fl_num) 
                                if max(f_llth)<>min(f_llth):
                                    print'9999999999999999999'
                                    cursor.callproc("common.Log_file_fail",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],meg,0,0])
                                    meg='%s  number of field not consistent' %source_file[z][2]                                
                                    mgg.append(meg) 
                                else:
                                    h+=1
                                    #print h                                                                                                                              
                                ff.close()                                                                                                                                                               
                        
                        
                        if source_file[z] in v_file:                                                                       
                            v_ll=line.split(v_termin)
                            vl_num=len(v_ll)
                            v_llth.append(vl_num)                       
                            if max(v_llth)<>min(v_llth):                                  
                                cursor.callproc("common.Log_file_fail",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],meg,0,0])
                                meg='The number of field not consistent'                                
                                print (-1,meg)
                            else:
                                j+=1
                                #print j            
                            ff.close()                                                   
                        z+=1    
                    if len(mgg)>1:
                        print (-1,mgg)
                    
                    if meg==0:                  
                        #print f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin
                        return f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin
                    else:
                        return 0,0,0,0,0,0,0,0
                         
                db.close()
            except Exception as e:
                return (-1,e)
                        
        #merge files 
        def fun_merge(self):
            try:
                db=self.conn_db()
                cursor=db.cursor()
                readsize = 1024
                k=0
                z=0
                f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin=self.fun_colnum() 
                #print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
                #print f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id 
                                                                                                     
                if f_num==0:
                    pass           
                else:                       
                    if len(i_file)==0:
                        pass
                    else:
                        newname1 = 'merge'+i_file[0][0]
                        targetname1=os.path.join(targetdir,newname1)
                        t_file1=open(targetname1,'wb')
                         
                        while z< len(i_file):
                            if i_file[z] in i_file:
                                f_name =os.path.join(i_file[z][4],i_file[z][0])                                                                        
                                f_file=open(f_name,'rb')
                                while 1:
                                    filebytes = f_file.read(readsize)
                                    if not filebytes:
                                        break
                                    else:
                                        t_file1.write(filebytes)
                                t_file1.write(os.linesep)
                                cursor.callproc("common.Log_file_success",[process_id,i_file[z][1],i_file[z][2],tp_id,i_file[z][3],0])
                                f_file.close()
                            z+=1                   
                        t_file1.close
                        
                    if len(v_file)==0:  
                        pass
                    else:
                        newname2 = 'merge'+v_file[0][0] 
                        targetname2=os.path.join(targetdir,newname2)
                        t_file2=open(targetname2,'wb')                                                                        
                        
                        a=0
                        b=0
                        f_min=0
                        lst_f=[]
                        file_line=[]
                        cursor.execute('select column_seq from DM_IF_VERIFY_FILE where table_id=:tbl_id and if_filename=1',tbl_id=table_id)        
                        c=cursor.fetchall()
                        column_seq=c[0][0]
                        while k < len(v_file):
                            v_name =os.path.join(v_file[k][4],v_file[k][0])                    
                            v_f=open(v_name,'rb')                    
                            line=v_f.readline()
                            line=line.strip('\n')
                            ll=line.split(v_termin)                      
                            file_line.append(ll)
                            
                            cursor.callproc("common.Log_file_success",[process_id,v_file[k][1],v_file[k][2],tp_id,v_file[k][3],0])
                            v_f.close()
                            k+=1
                        #print '9999999999999999999999999999999999999999999'
                        #print  file_line 
                
                        while a<len(ll):
                            b=0
                            f_min=0       
                            while b<len(v_file):
                                if a==column_seq-1:
                                    f_min=newname2
                                else:                                                            
                                    file_line[b][a]=int(file_line[b][a])
                                    f_min=f_min+file_line[b][a]                                                   
                                b=b+1
                            lst_f.append(f_min) 
                            a=a+1   
                        
                        #print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!"                        
                        #print lst_f      
                          
                        #print "$$$$$$$$$$$$$$$$$$$$$$$$$$"
                        q=0
                        while q<len(lst_f):
                            lst_f[q]=str(lst_f[q])
                            q+=1
                        #print lst_f
                        fstring="|".join(lst_f)
                        
                        #print fstring
                        t_file2.write(fstring)
                        t_file2.close()
                    #print 'dddddddddddddddddddddddddddd'
                    #print k                
                    #print z    
                    if k+z==f_num:
                        print (0,f_num)
                    
                db.close()
                          
            except Exception as e:
                return (-1,e)
           
                        
    if __name__ == '__main__':   
        a=kk()
        a.fun_merge()
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    wpf中TreeView双击事件中获取当前节点、父节点以及子节点
    VS2019 打包WPF安装程序最新教程(图文详解)
    c#常用委托
    ffmpeg 安装和录屏(python)
    python os.walk
    使用Scapy解析pcap格式数据
    python os.path模块
    subprocess.Popen参数介绍
    Python 发送4种形式的邮件
    Linux终端命令之screen X
  • 原文地址:https://www.cnblogs.com/yuchunju/p/2612283.html
Copyright © 2020-2023  润新知