• map map


    下面的无法运行。
    @Override
            protected void map(LongWritable key, Text value,
                    Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
                            throws IOException, InterruptedException {
                Configuration conf = context.getConfiguration();
                int tot = Integer.parseInt(conf.get("TOTALWORDS"));
    
                System.out.println("total === " + total);
                System.out.println("tot = " + tot);
    
                // 输入的格式如下:
                // ALB weekend 1
                // ALB weeks 3
                Map<String, List<String>> baseMap = new HashMap<String, List<String>>(); // 保存基础数据
    //            Map<String, List<Double>> priorMap = new HashMap<String, List<Double>>(); // 保存每个单词出现的概率
    
                String[] temp = value.toString().split("	");
                // 先将数据存到baseMap中
                if (temp.length == 3) {
                    // 文件夹名类别名temp[0]
                    String wordAndNumber = null;
                    wordAndNumber = temp[1] + "	" + temp[2];
                    if (baseMap.containsKey(temp[0])) {
    
                        baseMap.get(temp[0]).add(wordAndNumber);
                    } else {
                        List<String> oneList = new ArrayList<String>();
                        oneList.add(wordAndNumber);
                        baseMap.put(temp[0], oneList);
                    }
    
                } // 读取数据完毕,全部保存在baseMap中
    
                // 两层循环计算出每个类别中每个单词的概率
    
                Iterator<Map.Entry<String, List<String>>> iterators = baseMap.entrySet().iterator();
                while (iterators.hasNext()) {// 遍历类别
                    Map.Entry<String, List<String>> iterator = iterators.next();
                    int allWordsInClass = 0;
    
                    // list遍历
                    Iterator<String> its = iterator.getValue().iterator();
    
                    // 得到每个类别的单词总数
                    while (its.hasNext()) {
                        String[] temp1 = its.next().split("	");
                        allWordsInClass += Integer.parseInt(temp1[1]);
                    }
                    System.out.println(allWordsInClass);// 这个数据没有计算成功????
    
                    //
    //                Map<String, List<Double>> pMap = new HashMap<String, List<Double>>();
    //                List<Double> pList = new ArrayList<Double>();
                    // 遍历每个单词的词频计算器概率
                    while (its.hasNext()) {
                        String[] temp1 = its.next().split("	");
                        double p = (Integer.parseInt(temp1[1]) + 1) / (allWordsInClass + total);
                        String classAndWord = iterator.getKey() + "	" + temp1[0];
                        className.set(classAndWord);
                        number.set(p);
                        LOG.info("------>p = " + p);
    //                    context.write(className, number);
                        mos.write(iterator.getKey(), temp1[0], p);
                    }
    
                }
            }
    
    
    
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context)
                            throws IOException, InterruptedException {
                Configuration conf = context.getConfiguration();
                int tot = Integer.parseInt(conf.get("TOTALWORDS"));
                
                System.out.println("total === " + total);
                System.out.println("tot = " + tot);
                
                // 输入的格式如下:
                // ALB weekend 1
                // ALB weeks 3
                Map<String, Map<String, Integer>> baseMap = new HashMap<String, Map<String, Integer>>(); // 保存基础数据
                Map<String, Map<String, Double>> priorMap = new HashMap<String, Map<String, Double>>(); // 保存每个单词出现的概率
    
                String[] temp = value.toString().split("	");
                // 先将数据存到baseMap中
                if (temp.length == 3) {
                    // 文件夹名类别名
                    if (baseMap.containsKey(temp[0])) {
                        baseMap.get(temp[0]).put(temp[1], Integer.parseInt(temp[2]));
                    } else {
                        Map<String, Integer> oneMap = new HashMap<String, Integer>();
                        oneMap.put(temp[1], Integer.parseInt(temp[2]));
                        baseMap.put(temp[0], oneMap);
                    }
    
                } // 读取数据完毕,全部保存在baseMap中
    
                // 两层循环计算出每个类别中每个单词的概率
                Iterator<Map.Entry<String, Map<String, Integer>>> iterators = baseMap.entrySet().iterator();
                while (iterators.hasNext()) {// 遍历类别
                    Map.Entry<String, Map<String, Integer>> iterator = iterators.next();
                    int allWordsInClass = 0;
    
                    for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 遍历类别中的单词,先求出类别中的单词总数
                        allWordsInClass += entry.getValue();
                    }
                    System.out.println(allWordsInClass);//这个数据没有计算成功
                    //
                    Map<String, Double> pMap = new HashMap<String, Double>();
                    for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 在遍历每个单词的个数计算单词出现的概率
                        double p = (entry.getValue()+ 1.0) / (allWordsInClass + tot);//
                        pMap.put(entry.getKey(), p);
                        priorMap.put(iterator.getKey(), pMap);
                        className.set(iterator.getKey() + "	" + entry.getKey());
                        number.set(p);
                        LOG.info("------>p = " + p);
    
                        context.write(className, number);
    //                    mos.write(iterator.getKey(), entry.getKey(), p);
                    }
    
                }
                
                
                
                /*
                 * value.set(temp[1]); number.set(Integer.parseInt(temp[2]));
                 * mos.write(value, number, dirName);
                 */
            }
  • 相关阅读:
    SQL群集多实例卸载、安装
    Java 数组的 12 个方法
    Java 容器
    RDA 升级
    DVB条件接收系统多密技术的设计与实现
    perl数组的长度与元素个数
    TS流解析 四
    Python中操作myslq的方法
    DVB-subtitle解析流程浅
    JAVA回调机制(CallBack)详解
  • 原文地址:https://www.cnblogs.com/luolizhi/p/4941801.html
Copyright © 2020-2023  润新知