• 正则表达式python


    import re
    
    # re.match() 能够匹配出以xxx开头的字符串
    ret = re.match(r"H", "Hello Python")
    # print(ret.group())
    
    # 分组
    ret = re.match(r"([^-]*)-(d+)", "010-12345678")
    # print(ret.group(1))
    
    # 通过引用分组中匹配到的数据即可,但是要注意是元字符串,即类似 r""这种格式
    ret = re.match(r"<([a-zA-Z]*)>w*</1>", "<html>hh</html>")
    # print(ret.group(0))
    
    # 不仅匹配开头
    ret = re.search(r"d+", "阅读次数为 9999")
    # print(ret.group())
    
    # 匹配多个值,并返回一个列表
    ret_list = re.findall(r"d+", "python = 9999, c = 7890, c++ = 12345")
    # print(ret_list)
    
    # 匹配并替换多个值,并返回一个列表
    ret_list = re.sub(r"d+", '998', "python = 997 python = 997")
    print('ret_list=====: %s' % ret_list)
    
    # 匹配并切割
    ret_list = re.split(r":| ", "info:xiaoZhang 33 shandong")
    # print(ret_list)
    
    # 非贪婪模式。在"*","?","+","{m,n}"后面加上?,使贪婪变成非贪婪。
    s = "aa2343ddd"
    r = re.match(r"aa(d+?)", s)
    # print(r.group(1))
    
    print('111111111111111111')
    # 匹配:'http://www.freebuf.com',
    # url = 'http://www.freebuf.com'
    url = 'https://freebuf.com/articles/es/123%e7%b1%b3%e9%9b%aa%e5%84%bf'
    url = 'http://www.freebuf.com/author/%e7%b1%b3%e9%9b%aa%e5%84%bf'
    # url = 'http://www.freebuf.com/157843sdf.html'
    # 匹配域名:https://www.freebuf.com
    ret_list = re.search(r'https?://(w+?.)+w+/?$', url)
    print(ret_list.group()) if ret_list != None else print('ret_list = None')
    
    # 匹配文件夹:https://www.freebuf.com/articles/es
    ret_list = re.search(r'https?://(w+?.)+w+(/w+)*(/w+/?)$', url)
    if ret_list != None:
        print(ret_list.group())
    else:
        print('ret_list = None')
    # print(ret_list.group()) if ret_list != None else print('ret_list = None')
    pass
    # 匹配文件:http://www.freebuf.com/news/157843.html
    ret_list = re.search(r'https?://(w+?.)+w+(/w+)*(/w+.w+)$', url)
    print(ret_list.group()) if ret_list != None else print('ret_list = None')
    

      

    先编译一次,后面就不编译了

    import re
    
    str = 'https://www.freebuf.com/page/357'
    
    pattern = re.compile(r'(p|s)://www.(.+?..+?)/+?', re.DOTALL)
    match = pattern.findall(str)
    
    print(type(match))
    print(match)
    

      

    忽略大小写

    在正则表达式前面加(?i)

    import re
    
    
    str1 = """
    201.158.69.116 - - [03/Jan/2013:21:17:20 -0600] fwf[-] tip[-] 127.0.0.1:9000 0.007 0.007 MX pythontab.com GET /html/test.html http/1.1 "
    
    
    200" 2426 
    "http://a.com" "es-ES,es;q=0.8" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11"
    
    
    172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE HTtP/1.1" 201 649
    172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE/!svn/vcc/default HTTP/1.1" 207 401
    172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE/!svn/bln/31 HTTP/1.1" 207 454
    172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE HTTP/1.1" 207 649
    172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE/!svn/vcc/default HTTP/1.1" 207 454
    """
    
    pattern = re.compile(r'(?i)HTTP/.+?[^d]+?([2345]d{2})', re.DOTALL)
    ret = pattern.findall(str1)
    print(ret)
    

      

    点号默认不匹配换行,要想匹配换行,需要设置re.DOTALL

    pattern = re.compile(r'(?i)HTTP/.+?[^d]+?([2345]d{2})', re.DOTALL)
    

      

  • 相关阅读:
    SpringBoot整合RabbitMQ
    NIO
    eclipse配置maven
    IDEA常用快捷键
    IDEA如何快速查看类中的属性和方法?
    Java之IO流
    JS判断对象是否包含某个属性
    Jquery获取链接请求的参数
    JS中indexOf的用法
    JS驼峰与下划线互转
  • 原文地址:https://www.cnblogs.com/andy9468/p/8386969.html
Copyright © 2020-2023  润新知