#coding:utf-8 #/usr/bin/python """ 2018-11-25 dinghanhua re """ import re teststr = '"id":"2994925","publisher":"Yahoo Press","isbn10":"0596517742","isbn13":"9780596517748","title":"JavaScript","url":"https://api.douban.com/v2/book/2994925","alt_title":"","author_intro":"Douglas Crockford is a Senior JavaScript Architect at Yahoo!. He is the maintainer of the JSON format, and a regular speaker at conferences on advanced JavaScript topic. He is also on the JavaScript 2.0 committee at ECMA."'
'''re.match() 从字符串的起始位置匹配 ''' pattern = r'd+' print(re.match(pattern,teststr)) pattern = r'"id":"(.*)","publisher' matchobj = re.match(pattern,teststr) print(matchobj.group(0)) print(matchobj.groups()) print(matchobj.group(1)) print(matchobj.span()) print(matchobj.start(),matchobj.end())
'''re.search() 返回字符串中第一个匹配的 ''' pattern = r'd+' print(re.search(pattern,teststr)) pattern = r'"id":"(.*?)".*"title":"(.*?)"' matchobj = re.search(pattern,teststr) print(matchobj.group(0)) print(matchobj.groups()) print(matchobj.group(1,2)) print(matchobj.span()) print(matchobj.start(),matchobj.end())
'''re.sub() 替换匹配项 repl=替换的字符串,count替换几个,默认0替换所有''' pattern = r'd+' teststr2 = re.sub(pattern,repl='1111',string=teststr,count=1) print(teststr2) pattern = r'D+' teststr2 = re.sub(pattern,"",teststr) #去掉所有非数字 print(teststr2)
'''compile()生成正则表达式对象''' pattern = re.compile(r'"(w+)":"(w+)"') matchobj = pattern.match(teststr) print(matchobj.groups()) matchobj = pattern.search(teststr,10,100) #设定起始结束位置 print(matchobj.groups())
'''findall 匹配所有,返回列表''' pattern = r'"(w+)":"(d+)"' matchlist = re.findall(pattern,teststr) print(matchlist) pattern = re.compile(r'"(w+)":"(D+)"') matchlist = pattern.findall(teststr,10) print(matchlist)
'''re.finditer 匹配所有,返回迭代器''' pattern = r'"(w+)":"(d+)"' matchiter = re.finditer(pattern,teststr) print(matchiter) for m in matchiter: print(m.groups())
'''re.split() 正则分隔''' pattern = r'[^a-zA-Z]+' #根据非字母分隔 splitlist = re.split(pattern,teststr) print(splitlist)