KMP:字符间是否匹配
Levenshtein:字符间转换距离
先贴代码,再补别的
/** * @Author: liuxs * @Description: 编辑距离算法工具类 * @Date: Create in 18:47 2018/7/25. */ public class LevenshteinDistanceUtil { /** * 计算两字符间转换编辑距离 * @param s1 * @param s2 * @return Levenshtein Distance */ public static int getStringDistance(String s1, String s2) { int distance[][];// 定义距离表 int s1_len = s1.length(); int s2_len = s2.length(); if (s1_len == 0) { return s2_len; } if (s2_len == 0) { return s1_len; } distance = new int[s1_len + 1][s2_len + 1]; // 二维数组第一行和第一列放置自然数 for (int i = 0; i <= s1_len; i++) { distance[i][0] = i; } for (int j = 0; j <= s2_len; j++) { distance[0][j] = j; } // 比较,若行列相同,则代价为0,否则代价为1; for (int i = 1; i <= s1_len; i++) { char s1_i = s1.charAt(i - 1); // 逐一比较 for (int j = 1; j <= s2_len; j++) { char s2_j = s2.charAt(j - 1); // 若相等,则代价取0;直接取左上方值 if (s1_i == s2_j) { distance[i][j] = distance[i - 1][j - 1]; } else { // 否则代价取1,取左上角、左、上 最小值 + 代价(代价之和便是最终距离) distance[i][j] = getMin(distance[i - 1][j], distance[i][j - 1], distance[i - 1][j - 1]) + 1; } } } // 取二位数组最后一位便是两个字符串之间的距离 return distance[s1_len][s2_len]; } // 求最小值 private static int getMin(int a, int b, int c) { int min = a; if (b < min) { min = b; } if (c < min) { min = c; } return min; } /** * 计算相似度 * @param s1 * @param s2 * @return */ public static float calculateProximity(String s1, String s2) { float editDistance = getStringDistance(s1, s2); float proximity = 1 - editDistance / Math.max(s1.length(), s2.length()); return (float) (Math.round(proximity * 100)) / 100; } }
/** * @Author: liuxs * @Description: 字符匹配算法工具类 * @Date: Create in 18:47 2018/7/25. */ public class KMPMatchUtil { /** * @param c * 主串(源串)中的字符 * @param T * 模式串(目标串)字符数组 * @return 滑动距离 */ private static int dist(char c, char T[]) { int n = T.length; if (c == T[n - 1]) { return n;// c出现在模式串最后一位时 } for (int i = n; i >= 1; i--) { if (T[i - 1] == c) return n - i;// i=max{i|t[i-1]且0<=i<=n-2} } return n;// c不出现在模式中时 } /** * @param p_s * @param p_t * @return -2错误,-1匹配不到,[0,p_s.length-p_t.length]表示t在s中位置,下标从0开始 */ public static int index(final String p_s, final String p_t) { if (p_s == null || p_t == null) { return -2; } char[] s = p_s.toCharArray(); char[] t = p_t.toCharArray(); int slen = s.length, tlen = t.length; if (slen < tlen) { return -1; } int i = tlen, j; while (i <= slen) { j = tlen; while (j > 0 && s[i - 1] == t[j - 1]) {// S[i-1]与T[j-1]若匹配,则进行下一组比较;反之离开循环。 i--; j--; } if (0 == j) {// j=0时,表示完美匹配,返回其开始匹配的位置 return i;// 如果要匹配多个,这里改为:int pos=i;i = i+tlen+1; --其中每次这个pos就是位置 } else { // System.out.println(dist(s[i - 1], t)); i = i + dist(s[i - 1], t);// 把主串和模式串均向右滑动一段距离dist(s[i-1]).即跳过dist(s[i-1])个字符无需比较 } } return -1;// 模式串与主串无法匹配 } /** * 两字符是否匹配 * @param source * @param target * @return */ public static boolean kmpMatch(String source, String target) { if(null == source || null == target || "".equals(source.trim()) || "".equals(target.trim())) { return false; } int bl = source.length(); int al = target.length(); for(int bi = 0,ai = 0;bi < al;ai++) { if(bi == al || ai == bl) { return false; } else if(source.charAt(ai) == target.charAt(bi)) { bi++; } } return true; } }
/** * @Author: liuxs * @Description: 模糊搜索支行信息 * @Date: Create in 11:38 2018/7/26. */ @Service public class FuzzyBankBranchService { private final static Logger logger = LoggerFactory.getLogger(FuzzyBankBranchService.class); @Autowired private BankBranchService bankBranchService; public List<BankBranch> fuzzyFindByBranchName(QueryBankBean queryBankBean) { String fuzzyWord = queryBankBean.getBranchName(); List<BankBranch> allBranches = bankBranchService.findAll(); List<String> resultStr = new ArrayList<>(); List<String> relateCodes = new ArrayList<>(); for (BankBranch branch : allBranches) { String code = branch.getRelateCode(); String branchName = branch.getBranchName(); if ((KMPMatchUtil.kmpMatch(branchName, fuzzyWord)/* || KMPMatchUtil.kmpMatch(fuzzyWord, branchName)*/) && !relateCodes.contains(code)) { resultStr.add(branch.objectToString(branch)); } } if (CollectionUtils.isNotEmpty(resultStr)) { logger.info("通过支行名称:{},匹配到{}记录。", fuzzyWord, resultStr.size()); Collections.sort(resultStr, new Comparator<String>() { public int compare(String s1, String s2) { return LevenshteinDistanceUtil.getStringDistance(s1.split(",")[2], fuzzyWord) - LevenshteinDistanceUtil.getStringDistance(s2.split(",")[2], fuzzyWord); } }); //若匹配数大于15条,则返回相似度较高的15条 if (resultStr.size() > 15) { resultStr = resultStr.subList(0, 15); } } return convertBranch(resultStr, fuzzyWord); } /** * 整合返回信息,标记相似度 * @param resultStr * @param fuzzyWord * @return */ private List<BankBranch> convertBranch(List<String> resultStr, String fuzzyWord) { List<BankBranch> results = new ArrayList<>(); for (String str : resultStr) { BankBranch branch = stringToObject(str); branch.setProximity(LevenshteinDistanceUtil.calculateProximity(branch.getBranchName(), fuzzyWord)); results.add(branch); } return results; } private BankBranch stringToObject(String str) { BankBranch bankBranch = new BankBranch(); bankBranch.setRelateCode(str.split(",")[0]); bankBranch.setBankName(str.split(",")[1]); bankBranch.setBranchName(str.split(",")[2]); bankBranch.setProvinceName(str.split(",")[3]); bankBranch.setCityName(str.split(",")[4]); return bankBranch; } }