• 【python练习】正则表达式练习


    import re
    def process(input_data):
        """
        将同时有0号和33的用户IMSI和MSISDN提取出来
        :param input_data:  用户信息文件
    样例
    <SUBBEGIN
    	IMSI=1243560615528273;
    	MSISDN=986768559232;
    	VLRLIST=10;
    	CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO;
    	OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
    	OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
    	OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
    	OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1;
    	CHARGE_GLOBAL=3;
    <SUBEND
        :return: 号码信息列表,每个格式如示例:IMSI=1243560615528273;MSISDN=986768559232
        """
    
       # 对文件进行处理,返回可读取的列表
        f = open(input_data)
        message = f.readlines()
        f.close()
        list = []
        for i in range(len(message)):
            # list.append(message[i]) # ['<SUBBEGIN
    ', '	IMSI=1243560615528273;
    ', '	MSISDN=986768559232;
    ',
            list.append(message[i].strip('	').strip('
    ')) # ['<SUBBEGIN', 'IMSI=1243560615528273;', 'MSISDN=986768559232;',
        # print(list)
        # print(len(list))
    
        # 对列表进行拆分,获取子列表的索引
        start_index = []
        stop_index = []
    
        for i in range(len(list)):
            if list[i] == "<SUBBEGIN":
                start_index.append(i)
            elif list[i] == "<SUBEND":
                stop_index.append(i)
        # print(start_index) # [0, 11, 22, 33,...
        # print(stop_index) # [10, 21, 32, 43...
        # print(len(start_index)) # 1067
    
        # 重组新列表 new_string
        result = []
        for i in range(len(start_index)):
            new_list = []   # 每次重组列表重新生成
    # 通过切片实现代替for循环
    new_list = list[start_index[i]:stop_index[i]] for j in range(start_index[i], stop_index[i]): new_list.append(list[j]) new_string = ''.join(new_list) # 以指定字符串作为分隔符,将 seq 中所有的元素(的字符串表示)合并为一个新的字符串 print(new_string) # 转换为字符串 ''' <SUBBEGINIMSI=1243560615528273;MSISDN=986768559232;VLRLIST=10; CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO; OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1; OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1;CHARGE_GLOBAL=3; ''' # 正则表达式进行匹配 apn_33 = re.findall('OPTGPRS=d-33', new_string) apn_0 = re.findall('OPTGPRS=d-0', new_string) if len(apn_33) > 0 and len(apn_0) > 0: content = re.findall('IMSI=.+MSISDN=d+', new_string) # 正则表达式 返回列表 ['IMSI=1243560615528273;MSISDN=986768559232'] r = ''.join(content) # 将列表结果转换为字符串 'IMSI=1243560615528273;MSISDN=986768559232' result.append(r) # 将字符串写入result中 ['IMSI=1243560615528273;MSISDN=986768559232'] else: pass return result if __name__ == '__main__': process('input_data.txt')

    2、代码优化通过类实现

    import re
    
    class apnInfoFinder():
        def __init__(self, input_data):
            self.file = input_data
            self.msglist = []
            self.start_index = []
            self.stop_index = []
            self.result = []
    
        # 方法1: 对文件进行处理,返回可读取的列表
        def getMsgList(self):
            with open(self.file) as f:
                message = f.readlines()
                for i in range(len(message)):
                    self.msglist.append(message[i].strip('	').strip('
    '))  # ['<SUBBEGIN', 'IMSI=1243560615528273;', 'MSISDN=986768559232;',
                return self.msglist
    
        # 方法2  对列表进行拆分,获取子列表的起始索引
        def getNewList(self, list):
            for i in range(len(list)):
                if list[i] == "<SUBBEGIN":
                    self.start_index.append(i)
                elif list[i] == "<SUBEND":
                    self.stop_index.append(i)
            return self.start_index, self.stop_index
    
        # 方法3: 重组新列表并进行匹配查找
        def getFinder(self, lenlist, list):
            for i in range(len(lenlist)):
                new_list = []  # 每次重组列表重新生成
                for j in range(self.start_index[i], self.stop_index[i]):
                    new_list.append(list[j])
                new_string = ''.join(new_list)  # 以指定字符串作为分隔符,将 seq 中所有的元素(的字符串表示)合并为一个新的字符串
                # print(new_string)  # 转换为字符串
                '''
                <SUBBEGINIMSI=1243560615528273;MSISDN=986768559232;VLRLIST=10;
                CF=CFD-TS10-REG-91986762386238-YES-NO-20-YES-65535-YES-YES-NO-NO-NO-YES-YES-YES-YES-NO;
                OPTGPRS=3-33-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
                OPTGPRS=2-2-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
                OPTGPRS=1-0-504-241-33-NONE-0-NONE-00000000-65535-0-0-PS_APN-NONE-65535-1;
                OPTGPRS=4-33-65535-241-33-NONE-0-3-00000000-25-1-0-EPS_APN-NONE-65535-1;CHARGE_GLOBAL=3;
                '''
                # 正则表达式进行匹配
                apn_33 = re.findall('OPTGPRS=d-33', new_string)
                apn_0 = re.findall('OPTGPRS=d-0', new_string)
                if len(apn_33) > 0 and len(apn_0) > 0:
                    content = re.findall('IMSI=.+MSISDN=d+',
                                         new_string)  # 正则表达式 返回列表 ['IMSI=1243560615528273;MSISDN=986768559232']
                    r = ''.join(content)  # 将列表结果转换为字符串  'IMSI=1243560615528273;MSISDN=986768559232'
                    self.result.append(r)  # 将字符串写入result中 ['IMSI=1243560615528273;MSISDN=986768559232']
                else:
                    pass
            return self.result
    
        # 方法4: 结果输出为excel文件
        def outPut(self):
            self.getMsgList()
            self.getNewList(self.msglist)
            self.getFinder(self.start_index, self.msglist)
            with open('output.csv', 'w') as out_result:
                for line in self.result:
                    out_result.writelines(line + '
    ')
    
    if __name__ == '__main__':
        a = apnInfoFinder('input_data.txt')
        a.outPut()
  • 相关阅读:
    commons-logging.jar 和 log4j.jar 的关系
    百钱买百鸡
    reflect
    golang结构体、接口、反射
    golang文件操作
    sqlx使用说明
    go example
    goroutine
    生成二维码
    method&interface
  • 原文地址:https://www.cnblogs.com/zhaoyujiao/p/15429481.html
Copyright © 2020-2023  润新知