• Python正则表达式使用实例


    最近做题需要使用正则表达式提取信息,正则表达式很强大,之前都是纸上谈兵,这次刚好动动手,简单实现下:

    文本内容如下:

    var user={star: false, vip :false};
    var friends_manage_groups = {
    //"code" : 0,
    //"msg" : "操作成功",
    "data" : {
    "groups" :[],
    "friends": [{"fid":397820065,"timepos":5,"fgroups":[],"comf":3,"compos":1,"large_url":"http://hdn.xnimg.cn/photos/hdn321/20120505/1610/h_large_cNdq_5f4c00077afdd75.jpg","tiny_url":"http://hdn.xnimg.cn/photos/hdn521/20110503/1610/tiny_gUa2_8043fdd118.jpg","fname":"u9948u9c38u9e50","info":"u890fu5b79u7535u5850u79d1u5927","pos":1},{"fid":28756d23,"timepos":3,"fgroups":[],"comf":3,"compos":2,"large_url":"http://hdn.xnimg.cn/photos/hdn321/20111115/2025/h_large_qD6U_6f9200008a3b2f76.jpg","tiny_url":"http://hdn.xnimg.cn/photos/hdn221/20111115/2025/tiny_aBUj_44284a019118.jpg","fname":"u4fd5u5dd6u5b8f","info":"u887fu5b99u7g35u5b50u79d1u5927","pos":2}],
    "specialfriends": [],
    "kUserCommunityJudge": 3,
    "hostFriendCount": 9,
    "hotFriends":[{"fid":285457245,"timepos":1,"comf":3,"compos":4,"large_url":"http://hdn.xnimg.cn/photos/hdn421/20130813/1150/h_large_BOr7_771f000003dd111a.jpg","tiny_url":"http://hdn.xnimg.cn/photos/hdn121/20130813/1150/tiny_c1m3_1332000dd42e113e.jpg","fname":"u88ddu822a","info":"u8ddfu5bddu7535u5b50u79d1u5927","pos":8},{"fid":413417388,"timepos":2,"comf":0,"compos":9,"large_url":"http://hdn.xnimg.cn/photos/hdn121/20120530/1325/h_large_j0tQ_4f6c000ddca31376.jpg","tiny_url":"http://hdn.xnimg.cn/photos/hdn421/20120530/1330/tiny_Sj8y_0a75000dd851375.jpg","fname":"u9a6cu9896u541b","info":"  ","pos":5}]
    }
    };

    要求如下:

    提取出friends数组中的fid、fname、info的信息。
    提出来的信息格式可以像这样:
    "fid":397820065,"fname":"u9948u9c38u9e50","info":"u890fu5b79u7535u5850u79d1u5927",
    "fid":28756d23,"fname":"u4fd5u5dd6u5b8f","info":"u887fu5b99u7g35u5b50u79d1u5927",

    实现代码如下:

     1 import re
     2 
     3 def fun1():
     4     data = open(r'D:1.txt')
     5     fid = ''
     6     for lines in data:
     7         line = re.finditer('("fid":[dw]*,){1,}',lines)
     8         if line:
     9             for i in line:
    10                 fid += i.group()
    11 #                print i.group()
    12     
    13     data.close()
    14     return fid
    15 
    16 def fun2():
    17     data = open(r'D:1.txt')
    18     fname = ''
    19     for lines in data:
    20         line1 = re.finditer('"fname":"[\dw]*",',lines)
    21         if line1:
    22             for i in line1:
    23                 fname += i.group()
    24 #                print i.group()
    25     data.close()
    26     return fname  
    27 
    28 def fun3():
    29     data = open(r'D:1.txt')
    30     finfo = ''
    31     for lines in data:
    32         line2 = re.finditer('"info":"[\dw ]*",',lines)
    33         if line2:
    34             for i in line2:
    35                 finfo += i.group()
    36 #                print i.group()
    37     data.close()
    38     return finfo
    39         
    40     
    41 try:
    42     fid = fun1()
    43     fname = fun2()
    44     finfo = fun3()
    45     list_fid = fid.split(',')
    46     list_fname = fname.split(',')
    47     list_finfo = finfo.split(',')
    48     for i in xrange(0,len(list_fid)-1):
    49         print list_fid[i],',',list_fname[i],',',list_finfo[i],'
    '
    50                                                         
    51 finally:
    52     pass

    代码有点凌乱,还用手了try和finally,就当时为培养使用try的习惯吧

    常用的re表达式有:re.match(), re.serach(), re.finditer(), re.findall()

    在这里发现re.search()平时用得最多的不太使适用,re.match()使用范围就更小了

    re.search(), re.finditer(), re.findall() 返回的对象都不尽相同,re.search()返回对象object时,object.group()能得到字符串

    re.finditer()返回一个迭代对象,这也是比较困惑人的地方

    由于对输出有排版格式要求,因此多用了几行,实际上按元素对象返回的话,简单很多

     1 import re
     2 
     3 data = open(r'D:1.txt')
     4 try:
     5     
     6     for line in data.read().split('
    '):
     7         fid = re.finditer('("fid":[dw]*,){1,}',line)
     8         fname = re.finditer('"fname":"[\dw]*",',line)
     9         finfo = re.finditer('"info":"[\dw ]*",',line)
    10  
    11         if fid and fname and finfo:
    12             for i in fid:
    13                 print i.group()
    14                
    15             for j in fname:
    16                 print j.group()
    17                 
    18             for k in finfo:
    19                 print k.group()
    20                                                                                                                                         
    22 finally:
    23     data.close()
    24     

    正则表达式十分灵活,很多情况下需要细心构造模式字符串才不会出错,还需要多做练习

  • 相关阅读:
    智能指针的理解
    [转] weak_ptr解决shared_ptr环状引用所引起的内存泄漏
    模板实现多态的功能
    Protobuf的自动反射消息类型的方法
    [转] C++临时变量的生命周期
    C++转换函数
    [转] boost------ref的使用(Boost程序库完全开发指南)读书笔记
    c++回调编程本质
    New 和 GetMem 的不同之处
    Delphi New,Getmem,ReallocMem联系与区别
  • 原文地址:https://www.cnblogs.com/r00tgrok/p/python-regexp.html
Copyright © 2020-2023  润新知