'''
import re
# 正则表达式:针对字符串做模糊匹配
s = "hello yuan"
print(s.find("yuan")) # 6
s2 = "1,2,34,100,yuan,rain,alvin,45"
ret = re.findall("[a-z]+", s2)
print(ret) # ['yuan', 'rain', 'alvin']
#################### 元字符 ####################
# (1) 通配符 .: 默认模式下匹配除了换行符以外的所有符号
ret = re.findall("y..n", "hello yuan,hello rain,hello yabn")
print(ret) # ['yuan', 'yabn']
ret = re.findall("y..n", "hello yu\nn,hello rain,hello yabn", re.S) #修改模式通配符 .: 该模式下匹配所有符号
print(ret) # ['yu\nn', 'yabn']
# (2) 重复: * [0,无穷] +[1,无穷] ?[0,1] {} 指定范围
# 重点:默认贪婪匹配
# 如何取消贪婪匹配
ret = re.findall("\d", "1,2,3,55,yuan,33")
print(ret) # ['1', '2', '3', '5', '5', '3', '3']
ret = re.findall("hi \d{2}", "hi 1,hi 66,hi 188")
print(ret) # ['hi 66', 'hi 18']
ret = re.findall("hi \d*", "hi 1,hi 66,hi 188")
print(ret) # ['hi 1', 'hi 66', 'hi 188']
ret = re.findall("\d+", "66,188,2,12222,hello")
print(ret) # ['66', '188', '2', '12222']
ret = re.findall("\d+?", "66,188,2,12222,hello") # 取消贪婪匹配?
print(ret) # ['6', '6', '1', '8', '8', '2', '1', '2', '2', '2', '2']
ret = re.findall("hi \d?", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 6', 'hi 1', 'hi ']
ret = re.findall("hi \d{0,1}", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 6', 'hi 1', 'hi ']
ret = re.findall("hi \d{1,}", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 66', 'hi 188']
ret = re.findall("hi \d{0,}", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 66', 'hi 188', 'hi ']
ret = re.findall("hi \d{1,3}?", "hi 1,hi 66,hi 188,hi ") #
print(ret) # ['hi 1', 'hi 6', 'hi 1']
# (3) ^ $
ret = re.findall("^good/.{4}/.{4}", "hello/good/food/meat")
print(ret) # []
ret = re.findall("^good/.{4}/meat$", "good/aaaa/meat")
print(ret) # ['good/aaaa/meat']
# (4) [] 字符集匹配[]中任意一个符号, 字符集两个特殊符号 - 范围 ^: 取反
ret = re.findall("yu[ac]n", "yuan yubn yucn yuacn")
print(ret) # ['yuan', 'yucn']
ret = re.findall("yu[a,c]n", "yuan yubn yucn yu,n")
print(ret) # ['yuan', 'yucn', 'yu,n']
ret = re.findall("yu[0123456789]n", "yuan yu8n yucn yu2n")
print(ret) # print(ret) #
ret = re.findall("yu[0-9]n", "yuan yu8n yucn yu2n")
print(ret) # ['yu8n', 'yu2n']
ret = re.findall("[a-z0-9A-Z]+", "yuan,22,alvin,45,rain")
print(ret) # ['yuan', '22', 'alvin', '45', 'rain']
ret = re.findall("[^0-9]+", "yuan,22,alvin,45,rain") #取反
print(ret) # ['yuan,', ',alvin,', ',rain']
# (5) 分组() |
# 取消优先提取 ?:
ret = re.findall("https?://www\.[a-zA-Z0-9]+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret)
# (6) 转义符 \
# 赋予一些普通符号以特殊功能 [0-9] \d \w [0-9a-zA-Z]
# 取消一些特殊符号的特殊功能
ret = re.findall("\d+","123a45bcd678")
print(ret)
# (7) () 分组
#1.re.findall()
ret1 = re.findall("https?://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret1) # ['http://www.baidu.com', 'https://www.jd.com', 'http://www.python.cn']
ret2 = re.findall("(https?)://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret2) # ['http', 'https', 'http']
ret3 = re.findall("https?://www\.(\w+)\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret3) # ['baidu', 'jd', 'python']
#2.re.search() 查询匹配的第一个结果,返回对象
ret4 = re.search("https?://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret4) # <re.Match object; span=(0, 20), match='http://www.baidu.com'>
print(ret4.group()) # http://www.baidu.com
ret5= re.search("https?://www\.(?P<mingzi>\w+)\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret5) # <re.Match object; span=(0, 20), match='http://www.baidu.com'>
print(ret5.group("mingzi")) # baidu
ret6= re.search("(?P<yuming>https?)://www\.(?P<mingzi>\w+)\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret6.group("yuming")) # http
ret7= re.search("(?P<yuming>https?)://www\.(?P<mingzi>\w+)\.(?P<houzhui>com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret7.group("houzhui")) # com
#3.re.match() #开头进行匹配
ret8= re.match("https?://www\.\w+\.(?:com|cn)",
"http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret8.group()) # http://www.baidu.com
ret9= re.match("https?://www\.\w+\.(?:com|cn)",
"huchangxi,http://www.baidu.com,https://www.jd.com,http://www.python.cn,")
print(ret9.group()) # None
'''