import re def match_sxz(noun): return re.search('[sxz]$',noun) def apply_sxz(noun): return re.sub('$','es',noun) def match_h(noun): return re.search('[^aeioudgkprt]h',noun) def apply_h(noun): return re.sub('$','es',noun) def match_y(noun): return re.search('[^aeiou]y$',noun) def apply_y(noun): return re.sub('y$','ies',noun) def match_default(noun): return True def apply_default(noun): return re.sub('$','s',noun) rules = ((match_sxz, apply_sxz), (match_h, apply_h), (match_y, apply_y), (match_default, apply_default) ) def plural(noun): for match_rule, apply_rule in rules: if(match_rule(noun)): return apply_rule(noun) print(plural('body'));
改成动态创建函数的方式
import re def build_match_apply_function(pattern, search, replace): def match_rule(word): return re.search(pattern, word) def apply_rule(word): return re.sub(search, replace, word) return (match_rule, apply_rule) patterns = ( ('[sxz]$','$','es'), ('[^aeioudgkprt]h$','$','es'), ('(qu|[^aeiou]y$)','y$','ies'), ('$','$','s'), ) rules = [build_match_apply_function(pattern, search, replace) for (pattern, search, replace) in patterns] def plural(noun): for match_rule, apply_rule in rules: if(match_rule(noun)): return apply_rule(noun) print(plural('body'));
下面把规则存放到一个文件中,通过读取文件内容的方式,来完成
把规则写到plural.txt中
[sxz]$ $ es [^aeioudgkprt]h$ $ es [^aeiou]y$ y$ ies $ $ s
程序代码
import re def build_match_apply_function(pattern, search, replace): def match_rule(word): return re.search(pattern, word) def apply_rule(word): return re.sub(search, replace, word) return (match_rule, apply_rule) rules = [] with open('plural.txt',encoding='gb2312') as pattern_file: for line in pattern_file: pattern, search, replace = line.split(None, 3) rules.append(build_match_apply_function( pattern, search, replace)) def plural(noun): for match_rule, apply_rule in rules: if(match_rule(noun)): return apply_rule(noun) print(plural('body'));
继续改进,用生成器
import re def build_match_apply_function(pattern, search, replace): def match_rule(word): return re.search(pattern, word) def apply_rule(word): return re.sub(search, replace, word) return (match_rule, apply_rule) def rules(rule_filename): with open(rule_filename,encoding='gb2312') as pattern_file: for line in pattern_file: pattern, search, replace = line.split(None, 3) yield build_match_apply_function(pattern, search, replace) def plural(noun, rule_filename = 'plural.txt'): for match_rule, apply_rule in rules(rule_filename): if(match_rule(noun)): return apply_rule(noun) raise ValueError('no matching rule for {0}'.format(noun)) print(plural('body'));
改用迭代器读取文件,并缓存
import re def build_match_and_apply_functions(pattern, search, replace): def matches_rule(word): return re.search(pattern, word) def apply_rule(word): return re.sub(search, replace, word) return [matches_rule, apply_rule] class LazyRules: rules_filename = 'plural.txt' def __init__(self): self.pattern_file = open(self.rules_filename, encoding='gb2312') self.cache = [] def __iter__(self): self.cache_index = 0 return self def __next__(self): self.cache_index += 1 if len(self.cache) >= self.cache_index: return self.cache[self.cache_index - 1] if self.pattern_file.closed: raise StopIteration line = self.pattern_file.readline() if not line: self.pattern_file.close() raise StopIteration pattern, search, replace = line.split(None, 3) funcs = build_match_and_apply_functions( pattern, search, replace) self.cache.append(funcs) return funcs rules = LazyRules() def plural(noun): for matches_rule, apply_rule in rules: if matches_rule(noun): return apply_rule(noun) print(plural('bony'));