• 正则表达式-汉字的匹配方法


    unicode :   ([u4e00-u9fa5]+)

    unicode :  ([u2E80-u9FFF]+)

    utf-8  :  ([x80-xff]+)

     1 #encoding:utf-8
     2 import re 
     3 
     4 
     5 
     6 def main():
     7     
     8     # ([u4e00-u9fa5]+)
     9     TEST_STR_1 = u'ab123kk123'
    10     pattern_str = u'[0-9]+([u4e00-u9fa5]+)[0-9]+'
    11     pattern = re.compile (pattern_str)
    12     m = pattern.search(TEST_STR_1)
    13     print m.group() if m is not None else None
    14     print m.group(1) if m is not None else None
    15     print '
    '
    16     
    17     TEST_STR_2 = u'ab123汉字123'
    18     m = pattern.search(TEST_STR_2)
    19     print m.group() if m is not None else None
    20     print m.group(1) if m is not None else None
    21     print '
    '
    22     
    23     # ([x80-xff]+)
    24     TEST_STR_3 = 'ab123汉字123'
    25     pattern_str = '[0-9]+([x80-xff]+)[0-9]+'
    26     pattern = re.compile (pattern_str)
    27     m = pattern.search(TEST_STR_3)
    28     print m.group().decode('utf-8') if m is not None else None
    29     print m.group(1).decode('utf-8') if m is not None else None
    30     print '
    '    
    31     
    32     # ([u2E80-u9FFF]+)
    33     TEST_STR_2 = u'ab123汉字123'
    34     pattern_str = u'[0-9]+([u2E80-u9FFF]+)[0-9]+'
    35     pattern = re.compile (pattern_str)
    36     m = pattern.search(TEST_STR_2)
    37     print m.group() if m is not None else None
    38     print m.group(1) if m is not None else None
    39     print '
    '
    40     
    41     
    42     
    43 if __name__ == '__main__':
    44     main()
  • 相关阅读:
    日总结07
    Flask使用json或jsonify返回响应的数据
    日总结06
    tensorflow 代码流程02
    日总结05
    题解 P1505 [国家集训队]旅游
    数学期望
    常用软件
    HTMLHelper
    DateHelper(辅助类)
  • 原文地址:https://www.cnblogs.com/mmix2009/p/3220456.html
Copyright © 2020-2023  润新知