• 中文价格识别为数字 java代码



    运行效果:

    
    
    public class VoicePriceRecognition {
    
        private final static String NOT_HAS_PRICE_CONTENT="no price";
        //private static final Logger vineLogger = Vine.getLogger(VoicePriceRecognition.class);
      

        
    private static final Character[] PRICE_UNIT = {'元','块','角','毛','分'};
        private static final List<Character> PRICE_UNIT_LIST = Arrays.asList(PRICE_UNIT);
        private static final Character[] CN_NUMERIC = { '一', '二', '三', '四', '五',
                '六', '七', '八', '九', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖',
                '十', '百', '千', '拾', '佰', '仟',
                '万', '亿',
                // '○', 'O',
                '零' };
        private static final Character[] EN_NUMERIC = {'0','1','2','3','4','5','6','7','8','9'};
    
        private static final String BASE_UNIT_YUAN = "元";
        private static final String BASE_UNIT_KUAI = "块";
        private static final String BASE_UNIT_JIAO = "角";
        private static final String BASE_UNIT_MAO = "毛";
        private static final String BASE_UNIT_FEN = "分";
        private static Map<Character, Integer> cnNumeric = null;
        private static Map<Character, Integer> enNumeric = null;
    
        static {
            cnNumeric = new HashMap<Character, Integer>(40, 0.85f);
            for (int j = 0; j < 9; j++) {
                cnNumeric.put(CN_NUMERIC[j] , j + 1);
            }
            for (int j = 9; j < 18; j++) {
                cnNumeric.put(CN_NUMERIC[j] , j - 8);
            }
            cnNumeric.put('两', 2);
            cnNumeric.put('零', 0);
            cnNumeric.put('十', 10);
            cnNumeric.put('拾', 10);
            cnNumeric.put('百', 100);
            cnNumeric.put('佰', 100);
            cnNumeric.put('千', 1000);
            cnNumeric.put('仟', 1000);
            cnNumeric.put('万', 10000);
            cnNumeric.put('亿', 100000000);
            for(int i=0;i<EN_NUMERIC.length;i++){
                cnNumeric.put(EN_NUMERIC[i],i);
            }
        }
        private static void commonRecognition(String voiceContent,String baseStr, Map<String,String> result) throws ServiceException {
            String itemName;
            StringBuffer priceContent = new StringBuffer();
            int baseIndex = voiceContent.lastIndexOf(baseStr);
            if(baseIndex == 0){
                throw new InvalidParameterException("NOT_HAS_NAME", "no price");
            }
            //处理块 或 元 之前是 数字或者点 如 1234.5块(元),从块开始 倒着 一位一位的判断是否属于数字或者汉字数字直到非的那一位停止
            int baseIndex_ = baseIndex;
            if(Character.isDigit(voiceContent.charAt(baseIndex-1))){
                while (baseIndex >0 && (Character.isDigit(voiceContent.charAt(--baseIndex))||'.' ==voiceContent.charAt(baseIndex))){
                    priceContent.insert(0,voiceContent.charAt(baseIndex));
                }
                result.put("type","number");
            }
            //处理块 或 元 之前是 是汉字 如 一千二百元点五块(元)
            else {
                if(cnNumeric.containsKey(voiceContent.charAt(baseIndex-1))){
                    while (baseIndex >0 && (cnNumeric.containsKey(voiceContent.charAt(--baseIndex))||'点' == voiceContent.charAt(baseIndex))){
                        priceContent.insert(0,voiceContent.charAt(baseIndex));
                    }
                }
                result.put("type","cn");
            }
            //如果是汉字和数字的混合体 如 一百元56毛柒捌分
            if(baseIndex==0 && cnNumeric.containsKey(voiceContent.charAt(baseIndex))){
                throw new InvalidParameterException("NOT_HAS_NAME", "no price");
            }
            itemName = voiceContent.substring(0,baseIndex+1);
            result.put("name",itemName);
            priceContent.append(voiceContent.substring(baseIndex_==baseIndex?baseIndex:baseIndex_,voiceContent.length()));
            if(priceContent.length() <2){
                throw new InvalidParameterException("NOT_HAS_PRICE",NOT_HAS_PRICE_CONTENT);
            }
            result.put("price",priceContent.toString());
            getPrice(result);
            System.out.println(result);
        }
    
        private static void getPrice( Map<String,String> result) throws ServiceException {
            //按照规则 将部分字符替换
           String price =  result.get("price");
    
          /* StringBuffer sb = new StringBuffer(price);
           while(price.indexOf('零') >-1){
               sb.deleteCharAt(price.indexOf('零'));
               price = sb.toString();
           }*/
           price = price
                   .replace("块","元")
                   .replace("毛","角")
                   .replace('佰', '百')
                   .replace('仟', '千')
                   .replace('拾', '十')
                   .replace('零', ' ')
                   .replace("两","二").trim();
            result.put("price",price);
           //检验是否具有价格语义
            checkemanticAndSetPrice(result);
        }
    
        /**
         * 只针对千元进行校验,如果以后扩展到万  需要对万之前的特殊校验,beforeWan ,beforeYi
         * @param beforeYuan
         * @throws InvalidParameterException
         */
        private static void checkBeforeYuan(String  beforeYuan) throws InvalidParameterException {
            if(beforeYuan == null || beforeYuan.length()<1 || "".equals(beforeYuan)){
                return;
            }
            boolean invalid =
                    beforeYuan.split("百").length >2 ||
                    beforeYuan.split("千").length >2 ||
                    beforeYuan.split("十").length >2 ||
                    beforeYuan.split("零").length >2;
            if(invalid){
                throw new InvalidParameterException("价格无法识别:"+beforeYuan);
            }
        }
        private static void checkemanticAndSetPrice( Map<String,String> result) throws ServiceException {
    
            String targePrice = result.get("price");
    
            //检验单位顺序 和单位之间的值
            //1.单位个数是否都为1 如 元 角 分 是否只有一次而且顺序从小到大。按单位分出区间,各个数字区间是否是大小顺序排列。
            char[] targetPriceChars = targePrice.toCharArray();
            int yuanSum = 0;
            int jiaoSum = 0;
            int fenSum = 0;
            for(char s:targetPriceChars){
                if('元' == s){
                    yuanSum ++;
                }
                if('角' == s){
                    jiaoSum ++;
                }
                if('分' == s){
                    fenSum ++;
                }
            }
            //String [] yuanSplit = targePrice.split("元");
            //String [] jiaoSplit = targePrice.split("角");
            //String [] fenSplit = targePrice.split("分");
    
            int yuanIndex = targePrice.lastIndexOf('元');
            int jiaoIndex = targePrice.lastIndexOf('角');
            int fenIndex = targePrice.lastIndexOf('分');
            int invalidLength = 2;
            if(yuanSum>=invalidLength || jiaoSum>=invalidLength || fenSum>=invalidLength){
                throw new InvalidParameterException("价格无法识别:"+targePrice);
            }
            boolean shunXu = (yuanIndex > jiaoIndex &&jiaoIndex>-1) || (jiaoIndex >fenIndex && fenIndex > -1) || (yuanIndex >fenIndex && fenIndex>-1);
            if(shunXu){
                throw new InvalidParameterException("价格无法识别:"+targePrice);
            }
            //用于判断是否是最后一位
            boolean last = false;
            int lastIndex = 0;
            String beforeYuan=null;
            String betweenYuanAndJiao=null;
            String betweenJiaoAndFen = null;
            String afterCnDian = null;
            boolean has_dian=false;
            StringBuffer newPrice =new StringBuffer();
            if(yuanIndex > -1){
                beforeYuan= targePrice.substring(0,yuanIndex);
                if(beforeYuan.contains("点")){
                    int dianIndex = beforeYuan.lastIndexOf('点');
                    if(dianIndex > -1){
                        afterCnDian = beforeYuan.substring(dianIndex+1);
                        beforeYuan = beforeYuan.substring(0,dianIndex);
                    }
                }
                newPrice.append(beforeYuan).append("元");
                lastIndex = yuanIndex;
            }
            //beforeyuan需要校验 计量单位 千  百  十 零 是否有多个
            checkBeforeYuan(beforeYuan);
            if(jiaoIndex > -1){
                betweenYuanAndJiao = targePrice.substring(yuanIndex+1,jiaoIndex).trim();
                lastIndex = jiaoIndex;
            }
            if(fenIndex > -1){
                betweenJiaoAndFen = targePrice.substring(jiaoIndex >-1 ?jiaoIndex+1:yuanIndex+1,fenIndex).trim();
                lastIndex = fenIndex;
            }
            Double lastedPrice = 0D;
            //如果不相等 说明最后还存在没有货币单位的数字存在
            if(lastIndex+1 != targePrice.length()){
                String lastStr = targePrice.substring(lastIndex+1,targePrice.length());
                if(lastStr.length() >1){
                    lastStr = lastStr.substring(0,1);
                }
                //如果是数字 ,如果是可识别为数字的的汉字
                if(isCNNumeric(lastStr.charAt(0))>-1){
                    if('元' == targePrice.charAt(lastIndex)){
                            lastedPrice = 0.1*Double.valueOf(isCNNumeric(lastStr.charAt(0)));
                    }
                    if('角' == targePrice.charAt(lastIndex)){
                        lastedPrice = 0.01*Double.valueOf(isCNNumeric(lastStr.charAt(0)));
                    }
                    if('分' == targePrice.charAt(lastIndex)){
                        lastedPrice =0D;
                    }
                }
    
                last = true;
            }
    
            //重新拼写价格
            if(betweenYuanAndJiao !=null && betweenYuanAndJiao.length()>1){
                betweenYuanAndJiao = betweenYuanAndJiao.substring(0,1);
                newPrice.append(betweenYuanAndJiao).append("角");
            }
            if(betweenJiaoAndFen != null && betweenJiaoAndFen.length()>1){
                betweenJiaoAndFen = betweenJiaoAndFen.substring(0,1);
                newPrice.append(betweenJiaoAndFen).append("分");
            }
            if(last){
                newPrice.append(targePrice.substring(lastIndex,targePrice.length()));
            }
    
            result.put("price",newPrice.toString());
    
            Double beforeYuanPrice = 0D;
            if(yuanIndex > -1){
                if("cn".equals(result.get("type"))){
    
                    beforeYuanPrice = Double.valueOf(cnNumericToArabic(beforeYuan));
    
                }else if("number".equals(result.get("type"))){
                    try {
                        beforeYuanPrice = Double.valueOf(beforeYuan);
                    }catch (NumberFormatException e){
                        throw new InvalidParameterException("价格解析错误,无效价格");
                    }
                }else if("mixed".equals(result.get("type"))){
    
                }
            }
            BigDecimal bigDecimal = new BigDecimal(beforeYuanPrice.toString());
            if(betweenYuanAndJiao != null && !"".equals(betweenYuanAndJiao)  && isCNNumeric(betweenYuanAndJiao.charAt(0)) >-1){
                bigDecimal = bigDecimal.add(BigDecimal.valueOf(isCNNumeric(betweenYuanAndJiao.charAt(0))).multiply(BigDecimal.valueOf(0.1D)));
            }
            if(betweenJiaoAndFen != null && !"".equals(betweenYuanAndJiao) &&isCNNumeric(betweenJiaoAndFen.charAt(0)) >-1){
                bigDecimal = bigDecimal.add(BigDecimal.valueOf(isCNNumeric(betweenJiaoAndFen.charAt(0))).multiply(BigDecimal.valueOf(0.01D)));
            }
            bigDecimal = bigDecimal.add(new BigDecimal(String.valueOf(lastedPrice)));
            //如果'点' 存在,处理点后面的。//解析两位 角 分
            BigDecimal afterCnDianPrice = null;
            if(afterCnDian != null){
    
                if(afterCnDian.length()>1){
                    char jiao = afterCnDian.charAt(0);
                    char fen = afterCnDian.charAt(1);
                    afterCnDianPrice=  BigDecimal.valueOf(0.1).multiply(BigDecimal.valueOf(isCNNumeric(jiao))).add(BigDecimal.valueOf(0.01).multiply(BigDecimal.valueOf(isCNNumeric(fen))));
    
                }
                if(afterCnDian.length()==1){
                   char jiao = afterCnDian.charAt(0);
                   afterCnDianPrice =  BigDecimal.valueOf(0.1).multiply(BigDecimal.valueOf(isCNNumeric(jiao)));
                }
            }
            if(afterCnDianPrice != null){
                bigDecimal = bigDecimal.add(afterCnDianPrice);
            }
            bigDecimal =  bigDecimal.setScale(2, BigDecimal.ROUND_HALF_DOWN);
            result.put("price",String.valueOf(bigDecimal));
        }
        private static int cnNumericToArabic(String cnn) {
    
            cnn = cnn.trim();
            if (cnn.length() == 1) {
                return isCNNumeric(cnn.charAt(0));
            }
            int yi = -1, wan = -1, qian = -1, bai = -1, shi = -1;
            int val = 0;
            yi = cnn.lastIndexOf('亿');
            if (yi > -1) {
                val += cnNumericToArabic(cnn.substring(0, yi)) * 100000000;
                if (yi < cnn.length() - 1) {
                    cnn = cnn.substring(yi + 1 , cnn.length());
                } else {
                    cnn = "";
                }
    
                if (cnn.length() == 1) {
                    int arbic = isCNNumeric(cnn.charAt(0));
                    if (arbic <= 10) {
                        val += arbic * 10000000;
                    }
                    cnn = "";
                }
            }
    
            wan = cnn.lastIndexOf('万');
            if (wan > -1) {
                val += cnNumericToArabic(cnn.substring(0, wan)) * 10000;
                if (wan < cnn.length() - 1) {
                    cnn = cnn.substring(wan + 1 , cnn.length());
                } else {
                    cnn = "";
                }
                if (cnn.length() == 1) {
                    int arbic = isCNNumeric(cnn.charAt(0));
                    if (arbic <= 10) {
                        val += arbic * 1000;
                    }
                    cnn = "";
                }
            }
    
            qian = cnn.lastIndexOf('千');
            if (qian > -1) {
                val += cnNumericToArabic(cnn.substring(0, qian)) * 1000;
                if (qian < cnn.length() - 1) {
                    cnn = cnn.substring(qian + 1 , cnn.length());
                } else {
                    cnn = "";
                }
                if (cnn.length() == 1) {
                    int arbic = isCNNumeric(cnn.charAt(0));
                    if (arbic <= 10) {
                        val += arbic * 100;
                    }
                    cnn = "";
                }
            }
    
            bai = cnn.lastIndexOf('百');
            if (bai > -1) {
                val += cnNumericToArabic(cnn.substring(0, bai)) * 100;
                if (bai < cnn.length() - 1) {
                    cnn = cnn.substring(bai + 1 , cnn.length());
                } else {
                    cnn = "";
                }
                if (cnn.length() == 1) {
                    int arbic = isCNNumeric(cnn.charAt(0));
                    if (arbic <= 10) {
                        val += arbic * 10;
                    }
                    cnn = "";
                }
            }
    
            shi = cnn.lastIndexOf('十');
            if (shi > -1) {
                if (shi == 0) {
                    val += 1 * 10;
                } else {
                    val += cnNumericToArabic(cnn.substring(0 , shi)) * 10;
                }
                if (shi < cnn.length() - 1) {
                    cnn = cnn.substring(shi + 1 , cnn.length());
                } else {
                    cnn = "";
                }
            }
    
            cnn = cnn.trim();
            //特殊处理 如 三三三元 处理成为3元,也可以理解为处理成为三百三十三元。
           if(cnn.length()>1){
                cnn = cnn.substring(0,1);
            }
            for (int j = 0; j < cnn.length(); j++) {
                val += isCNNumeric(cnn.charAt(j))
                        * Math.pow(10 , cnn.length() - j - 1);
            }
    
            return val;
        }
        private static int isCNNumeric(char c) {
            Integer i = cnNumeric.get(c);
            if (i == null) {
                return -1;
            }
            return i.intValue();
        }
    
        private static int isENNumeric(char c) {
            Integer i = enNumeric.get(c);
            if (i == null) {
                return -1;
            }
            return i.intValue();
        }
    
        public static Map<String,String> priceRecognition(String voiceContent) throws ServiceException {
            //1.非空 verify
            //vineLogger.info("获取语音输入内容:"+voiceContent);
            Map<String,String> result = new HashMap<>();
            result.put("voiceContent",voiceContent);
            if(StringUtils.isEmpty(voiceContent)){
                throw new InvalidParameterException("NOT_HAS_PRICE",NOT_HAS_PRICE_CONTENT);
            }
    
            //2.识别价格区间
            //按照块 和 元 进行基准位置,如果没有块或者元,按照角,毛,分进行
    if(PRICE_UNIT_LIST.contains(voiceContent.charAt(voiceContent.length()-1))){
    if(voiceContent.contains(BASE_UNIT_YUAN)){
    commonRecognition(voiceContent,BASE_UNIT_YUAN,result);
    } else if(voiceContent.contains(BASE_UNIT_KUAI)){
    commonRecognition(voiceContent,BASE_UNIT_KUAI,result);
    } else if(voiceContent.contains(BASE_UNIT_MAO)){
    commonRecognition(voiceContent,BASE_UNIT_MAO,result);
    }else if(voiceContent.contains(BASE_UNIT_JIAO)){
    commonRecognition(voiceContent,BASE_UNIT_JIAO,result);
    }else if(voiceContent.contains(BASE_UNIT_FEN)){
    commonRecognition(voiceContent,BASE_UNIT_FEN,result);
    }
    } else {
    //不存在价格
    commonRecognition(voiceContent+"元",BASE_UNIT_YUAN,result);
    //throw new InvalidParameterException("NOT_HAS_PRICE",NOT_HAS_PRICE_CONTENT);
    }
    return result; } 


    public static void main (String[] args) throws ServiceException
    { String s0
    = "毛血旺28";//"毛豆炸酱煲仔饭14";

    String s1 = "醋0.5元";
    String s2
    = "西红柿2金一千二百块二分";
    String s4
    = "西红柿2金12.4元";
    String s5
    = "西红柿2金2222212.42222块";
    String s6
    = "西红柿2金一百元56毛柒捌分";
    String s7
    = "红烧肉一百一百五十五十元五十毛柒捌分";
    String s8
    = "手抓饼一二千三四百五六十七八元一二毛三四分";
    String s9
    = "一千二百三十四元五毛六分";
    List
    <String> sb = new ArrayList<>(9);
    sb.add(s0); sb.add(s1); sb.add(s2); sb.add(s4); sb.add(s5); sb.add(s6); sb.add(s7); sb.add(s8); sb.add(s9);
    for(String s:sb){
    try { priceRecognition(s);
    }
    catch (Exception e){
    e.printStackTrace(); }
    }
    }
    }
  • 相关阅读:
    批量删除.svn文件夹、.svn文件
    Windows 7下Git SSH 创建Key的步骤
    Git:本地项目与远程仓库的git/clone
    git解决二进制文件冲突
    git设置mergetool可视化工具
    redhat7.2配置yum源
    project2016安装与破解
    strace 使用案例
    运维老鸟教你安装centos6.5如何选择安装包
    CSS限制
  • 原文地址:https://www.cnblogs.com/luoluoshidafu/p/8603674.html
Copyright © 2020-2023  润新知