• 正则表达式 Python for Data Analysis 笔记


    import re
    
    # 描述一个或多个空白符的regex是s+
    text = "foo bar	 baz 	qux"
    
    regex = re.compile('s+')
    print(regex.split(text))  #  等于 re.split('s+',text)
    # ['foo', 'bar', 'baz', 'qux']
    
    print(regex.findall(text)) # 匹配到的模式
    #[' ', '	 ', ' 	']
    
    text = """
    Dave dave@google.com
    Steve steve@gmail.com
    Rob rob@gmail.com
    Ryan ryan@yahoo.com
    """
    pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+.[A-Z]{2,4}'
    regex = re.compile(pattern,flags=re.IGNORECASE) # re.IGNORECASE 忽略大小写
    print(regex.findall(text))
    # ['dave@google.com', 'steve@gmail.com', 'rob@gmail.com', 'ryan@yahoo.com']
    
    # search返回第一个,只告诉原字符串中的起始和结束位置
    m = regex.search(text)
    print(m) # <re.Match object; span=(6, 21), match='dave@google.com'>
    print(text[m.start():m.end()]) # dave@google.com
    
    # sub 将匹配到的字符串替换为指定字符串
    print(regex.sub("REDACTED",text))
    '''
    Dave REDACTED
    Steve REDACTED
    Rob REDACTED
    Ryan REDACTED
    '''
    
    pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+).([A-Z]{2,4})'
    regex = re.compile(pattern,flags=re.IGNORECASE) # re.IGNORECASE 忽略大小写
    m = regex.match('wesm@bright.com')
    print(m.groups()) # ('wesm', 'bright', 'com')
    print(regex.findall(text))
    '''
    [('dave', 'google', 'com'), 
    ('steve', 'gmail', 'com'), 
    ('rob', 'gmail', 'com'), 
    ('ryan', 'yahoo', 'com')]
    '''
    # sub可以通过1、2之类的特殊符号访问各匹配项中的分组
    print(regex.sub(r'Username:1, Domain:2, Suffix:3',text))
    '''
    Dave Username:dave, Domain:google, Suffix:com
    Steve Username:steve, Domain:gmail, Suffix:com
    Rob Username:rob, Domain:gmail, Suffix:com
    Ryan Username:ryan, Domain:yahoo, Suffix:com
    '''
    
    # 这种正则表达式所产生的对象可以得到一个简单易用的带有分组名称的字典
    pattern = r"""
    (?P<username>[A-Z0-9._%+-]+)
    @
    (?P<Domain>[A-Z0-9.-]+)
    .
    (?P<Suffix>[A-Z]{2,4})
    """
    regex = re.compile(pattern,flags=re.IGNORECASE|re.VERBOSE) # re.IGNORECASE 忽略大小写
    m = regex.match('wesm@bright.com')
    print(m.groupdict())
    '''
    {'username': 'wesm', 'Domain': 'bright', 'Suffix': 'com'}
    '''
  • 相关阅读:
    【VUE3.0体验】关于路由的一些坑
    TensorFlow中的卷积函数
    TensorFlow源码安装
    ubuntu远程桌面
    TensorFlow图像处理API
    C程序员眼里的Python
    深度剖析HashMap的数据存储实现原理(看完必懂篇)
    golang 互斥锁和读写锁
    golang goroutine的调度
    golang channel的使用以及调度原理
  • 原文地址:https://www.cnblogs.com/nicole-zhang/p/15204766.html
Copyright © 2020-2023  润新知