• Pyuthon正则表达式re模块练习


    '''
    import re

    # 正则表达式:针对字符串做模糊匹配
    s = "hello yuan"
    print(s.find("yuan")) # 6

    s2 = "1,2,34,100,yuan,rain,alvin,45"
    ret = re.findall("[a-z]+", s2)
    print(ret) # ['yuan', 'rain', 'alvin']

    #################### 元字符 ####################

    # (1) 通配符 .: 默认模式下匹配除了换行符以外的所有符号

    ret = re.findall("y..n", "hello yuan,hello rain,hello yabn")
    print(ret) # ['yuan', 'yabn']
    ret = re.findall("y..n", "hello yu\nn,hello rain,hello yabn", re.S) #修改模式通配符 .: 该模式下匹配所有符号
    print(ret) # ['yu\nn', 'yabn']

    # (2) 重复: * [0,无穷] +[1,无穷] ?[0,1] {} 指定范围
    # 重点:默认贪婪匹配
    # 如何取消贪婪匹配

    ret = re.findall("\d", "1,2,3,55,yuan,33")
    print(ret) # ['1', '2', '3', '5', '5', '3', '3']

    ret = re.findall("hi \d{2}", "hi 1,hi 66,hi 188")
    print(ret) # ['hi 66', 'hi 18']

    ret = re.findall("hi \d*", "hi 1,hi 66,hi 188")
    print(ret) # ['hi 1', 'hi 66', 'hi 188']

    ret = re.findall("\d+", "66,188,2,12222,hello")
    print(ret) # ['66', '188', '2', '12222']

    ret = re.findall("\d+?", "66,188,2,12222,hello") # 取消贪婪匹配?
    print(ret) # ['6', '6', '1', '8', '8', '2', '1', '2', '2', '2', '2']

    ret = re.findall("hi \d?", "hi 1,hi 66,hi 188,hi ") #
    print(ret) # ['hi 1', 'hi 6', 'hi 1', 'hi ']

    ret = re.findall("hi \d{0,1}", "hi 1,hi 66,hi 188,hi ") #
    print(ret) # ['hi 1', 'hi 6', 'hi 1', 'hi ']

    ret = re.findall("hi \d{1,}", "hi 1,hi 66,hi 188,hi ") #
    print(ret) # ['hi 1', 'hi 66', 'hi 188']

    ret = re.findall("hi \d{0,}", "hi 1,hi 66,hi 188,hi ") #
    print(ret) # ['hi 1', 'hi 66', 'hi 188', 'hi ']

    ret = re.findall("hi \d{1,3}?", "hi 1,hi 66,hi 188,hi ") #
    print(ret) # ['hi 1', 'hi 6', 'hi 1']


    # (3) ^ $

    ret = re.findall("^good/.{4}/.{4}", "hello/good/food/meat")
    print(ret) # []

    ret = re.findall("^good/.{4}/meat$", "good/aaaa/meat")
    print(ret) # ['good/aaaa/meat']


    # (4) [] 字符集匹配[]中任意一个符号, 字符集两个特殊符号 - 范围 ^: 取反
    ret = re.findall("yu[ac]n", "yuan yubn yucn yuacn")
    print(ret) # ['yuan', 'yucn']

    ret = re.findall("yu[a,c]n", "yuan yubn yucn yu,n")
    print(ret) # ['yuan', 'yucn', 'yu,n']

    ret = re.findall("yu[0123456789]n", "yuan yu8n yucn yu2n")
    print(ret) # print(ret) #

    ret = re.findall("yu[0-9]n", "yuan yu8n yucn yu2n")
    print(ret) # ['yu8n', 'yu2n']

    ret = re.findall("[a-z0-9A-Z]+", "yuan,22,alvin,45,rain")
    print(ret) # ['yuan', '22', 'alvin', '45', 'rain']

    ret = re.findall("[^0-9]+", "yuan,22,alvin,45,rain") #取反
    print(ret) # ['yuan,', ',alvin,', ',rain']

    # (5) 分组() |
    # 取消优先提取 ?:
    ret = re.findall("https?://www\.[a-zA-Z0-9]+\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret)

    # (6) 转义符 \
    # 赋予一些普通符号以特殊功能 [0-9] \d \w [0-9a-zA-Z]
    # 取消一些特殊符号的特殊功能

    ret = re.findall("\d+","123a45bcd678")
    print(ret)

    # (7) () 分组

    #1.re.findall()
    ret1 = re.findall("https?://www\.\w+\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret1) # ['http://www.baidu.com', 'https://www.jd.com', 'http://www.python.cn']
    ret2 = re.findall("(https?)://www\.\w+\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret2) # ['http', 'https', 'http']

    ret3 = re.findall("https?://www\.(\w+)\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret3) # ['baidu', 'jd', 'python']

    #2.re.search() 查询匹配的第一个结果,返回对象
    ret4 = re.search("https?://www\.\w+\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret4) # <re.Match object; span=(0, 20), match='http://www.baidu.com'>
    print(ret4.group()) # http://www.baidu.com

    ret5= re.search("https?://www\.(?P<mingzi>\w+)\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret5) # <re.Match object; span=(0, 20), match='http://www.baidu.com'>
    print(ret5.group("mingzi")) # baidu

    ret6= re.search("(?P<yuming>https?)://www\.(?P<mingzi>\w+)\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret6.group("yuming")) # http

    ret7= re.search("(?P<yuming>https?)://www\.(?P<mingzi>\w+)\.(?P<houzhui>com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret7.group("houzhui")) # com

    #3.re.match() #开头进行匹配
    ret8= re.match("https?://www\.\w+\.(?:com|cn)",
    "http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret8.group()) # http://www.baidu.com

    ret9= re.match("https?://www\.\w+\.(?:com|cn)",
    "huchangxi,http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
    print(ret9.group()) # None

    '''


  • 相关阅读:
    maven项目从本地向本地仓库导入jar包
    身份证图片信息获取
    使用阿里云短信服务发送短信验证码
    网上爬取快递100的快递公司名称和公司编码
    快递100通过快递单号实时查询物流信息
    @autowired注入静态变量
    MySql多机优化---读写分离流程分析
    MySQL单机优化---SQL优化
    MySQL单机优化---分表、分区、分库
    MySQL数据库优化
  • 原文地址:https://www.cnblogs.com/A121/p/16110268.html
Copyright © 2020-2023  润新知