package com.taidi.nlp.cn.bot.utils; import java.text.NumberFormat; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.springframework.util.StringUtils; /** * 中文转数字 * @author duanyu * */ public class ChineseNumFormat { private static final Pattern CHINESE_NUM_PATTERN = Pattern.compile( "(?<yi>[零点一两二三四五六七八九十百千万\d]+亿)?(?<wan>[零点一两二三四五六七八九十百千\d]+万)?(?<thousand>[零点一两二三四五六七八九\d]+千)?(?<hundred>[零点一两二三四五六七八九\d]+百)?(?<ten>[零点一两二三四五六七八九\d]*十)?(?<one>[零点一两二三四五六七八九\d]+)?"); private static final Pattern NUM_UNIT_PATTERN = Pattern.compile("(?<num>[零点一两二三四五六七八九十百千万]+)[亿万千百十]"); private static final Pattern ONE_UNIT_PATTERN = Pattern.compile("(?<num>[零一两二三四五六七八九])"); private static final Pattern NUM_PATTERN = Pattern.compile("[\d\.]+"); private static final Pattern HALF_NUM_PATTERN = Pattern.compile("(?<num>[一二三四五六七八九十两\d]{1,3})(个)?(半)?"); private static final Pattern ONLY_NUM_PATTERN = Pattern.compile("(?<num>[零一二三四五六七八九]+)"); private static final String[] NUMS = {"零", "一", "二", "三", "四", "五", "六", "七", "八", "九" }; private static final String[] UNITS = {"零", "十", "百", "千", "万", "千万", "亿" }; private static final String POINT = "点"; private static final String[] HALFS = {"半", "半个" }; public static void main(String[] args) { String numberStr = "十三点五"; // Matcher matcher = CHINESE_NUM_PATTERN.matcher(numberStr); // if (matcher.matches()) { // System.out.println(matcher.group("yi")); // System.out.println(matcher.group("wan")); // System.out.println(matcher.group("thousand")); // System.out.println(matcher.group("hundred")); // System.out.println(matcher.group("ten")); // System.out.println(matcher.group("one")); // } NumberFormat nf = NumberFormat.getInstance(); System.out.println(nf.format(chineseToNumber(numberStr))); System.out.println(halfNumHandle("十一个半")); } private enum Unit { YI("yi"), WAN("wan"), THOUSAND("thousand"), HUNDRED("hundred"), TEN("ten"), ONE("one"); private String name; private Unit(String name) { this.name = name; } public String getName() { return name; } public void setName(String name) { this.name = name; } } /** * 中文字符串转数字 * * @param numStr * @return */ public static double chineseToNumber(String numStr) { // 阿拉伯数字字符串处理 Matcher num_matcher = NUM_PATTERN.matcher(numStr); if (num_matcher.matches()) { return Double.parseDouble(numStr); } // 中文数字转阿拉伯数字 Matcher chinese_num_matcher = CHINESE_NUM_PATTERN.matcher(numStr); String unit = ""; if (chinese_num_matcher.matches()) { String yi = chinese_num_matcher.group(Unit.YI.getName()) == null ? UNITS[0] : chinese_num_matcher.group(Unit.YI.getName()); unit = UNITS[0].equals(yi) ? unit : UNITS[4]; String wan = chinese_num_matcher.group(Unit.WAN.getName()) == null ? UNITS[0] : chinese_num_matcher.group(Unit.WAN.getName()); unit = UNITS[0].equals(wan) ? unit : UNITS[3]; String thousand = chinese_num_matcher.group(Unit.THOUSAND.getName()) == null ? UNITS[0] : chinese_num_matcher.group(Unit.THOUSAND.getName()); unit = UNITS[0].equals(thousand) ? unit : UNITS[2]; String hundred = chinese_num_matcher.group(Unit.HUNDRED.getName()) == null ? UNITS[0] : chinese_num_matcher.group(Unit.HUNDRED.getName()); unit = UNITS[0].equals(hundred) ? unit : UNITS[1]; String ten = chinese_num_matcher.group(Unit.TEN.getName()) == null ? UNITS[0] : chinese_num_matcher.group(Unit.TEN.getName()); ten = UNITS[1].equals(ten) || "零十".equals(ten) ? "一十" : ten; String one = chinese_num_matcher.group(Unit.ONE.getName()) == null ? UNITS[0] : chinese_num_matcher.group(Unit.ONE.getName()); // 俗语数字处理一万六 if (!one.startsWith(UNITS[0]) && UNITS[0].equals(ten)) { if (!StringUtils.isEmpty(unit)) { if (unit.equals(UNITS[4])) { wan = one + UNITS[5]; } if (unit.equals(UNITS[3])) { thousand = one + UNITS[3]; } if (unit.equals(UNITS[2])) { hundred = one + UNITS[2]; } if (unit.equals(UNITS[1])) { ten = one + UNITS[1]; } one = UNITS[0]; } } double num = getYiNum(yi) + getWanNum(wan) + getThousandNum(thousand) + getHundredNum(hundred) + getTenNum(ten) + getOneNum(one); return num; } return 0; } /** * 亿级别数据处理 * * @param numStr * @return */ private static double getYiNum(String numStr) { double num = 0; Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr); if (matcher.matches() && numStr.endsWith(UNITS[6])) { num = chineseToNumber(matcher.group("num")); } return num * 100000000; } /** * 万级别数据处理 * * @param numStr * @return */ private static double getWanNum(String numStr) { double num = 0; Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr); if (matcher.matches() && numStr.endsWith(UNITS[4])) { num = chineseToNumber(matcher.group("num")); } return num * 10000; } /** * 千级别数据处理 * * @param numStr * @return */ private static double getThousandNum(String numStr) { double num = 0; Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr); if (matcher.matches() && numStr.endsWith(UNITS[3])) { num = chineseToNumber(matcher.group("num")); } return num * 1000; } /** * 百级别数据处理 * * @param numStr * @return */ private static double getHundredNum(String numStr) { double num = 0; Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr); if (matcher.matches() && numStr.endsWith(UNITS[2])) { num = chineseToNumber(matcher.group("num")); } return num * 100; } /** * 十级别数据处理 * * @param numStr * @return */ private static double getTenNum(String numStr) { double num = 0; Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr); if (matcher.matches() && numStr.endsWith(UNITS[1])) { num = chineseToNumber(matcher.group("num")); } return num * 10; } /** * 个级别数据处理 * * @param numStr * @return */ private static double getOneNum(String numStr) { double num = 0; if (numStr.startsWith(UNITS[0])) { numStr = numStr.substring(1); } // 小数点处理 if (numStr.contains(POINT)) { String[] split = numStr.split(POINT); int num1 = (int) chineseToNumber(split[0]); int num2 = onlyStringNumToInt(split[1]); String num3 = String.valueOf(num1) + "." + String.valueOf(num2); return Double.parseDouble(num3); } Matcher matcher = ONE_UNIT_PATTERN.matcher(numStr); if (matcher.matches()) { num = chineseNumToNum(matcher.group("num")); } return num; } /** * 纯中文数字转换,不包含十百 */ public static int onlyStringNumToInt(String num) { if (StringUtils.isEmpty(num)) { return 0; } Matcher matcher = ONLY_NUM_PATTERN.matcher(num); if (matcher.matches()) { for (int i = 0; i < NUMS.length; i++) { num = num.replaceAll(NUMS[i], String.valueOf(chineseNumToNum(NUMS[i]))); } } return Integer.parseInt(num); } private static int chineseNumToNum(String replaceNumber) { switch (replaceNumber) { case "一": return 1; case "二": return 2; case "两": return 2; case "三": return 3; case "四": return 4; case "五": return 5; case "六": return 6; case "七": return 7; case "八": return 8; case "九": return 9; case "十": return 10; case "零": return 0; default: return 0; } } /** * 对半数的的转换 * * @param halfNum * @return */ public static double halfNumHandle(String halfNum) { float num = 0; Matcher matcher2 = NUM_PATTERN.matcher(halfNum); if (matcher2.matches()) { return Float.valueOf(halfNum); } if (HALFS[1].equals(halfNum) || HALFS[0].equals(halfNum)) { num = 0.5f; return num; } Matcher matcher = HALF_NUM_PATTERN.matcher(halfNum); if (matcher.find()) { String numHalf = matcher.group("num"); Matcher matcher3 = NUM_PATTERN.matcher(numHalf); if (matcher3.matches()) { if (halfNum.contains(HALFS[0])) { return Float.valueOf(numHalf) + 0.5f; } return Float.valueOf(numHalf); } int changeNum = (int) chineseToNumber(numHalf); num = changeNum; if (halfNum.contains(HALFS[0])) { num = changeNum + 0.5f; } } return num; } }