下面的无法运行。
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int tot = Integer.parseInt(conf.get("TOTALWORDS")); System.out.println("total === " + total); System.out.println("tot = " + tot); // 输入的格式如下: // ALB weekend 1 // ALB weeks 3 Map<String, List<String>> baseMap = new HashMap<String, List<String>>(); // 保存基础数据 // Map<String, List<Double>> priorMap = new HashMap<String, List<Double>>(); // 保存每个单词出现的概率 String[] temp = value.toString().split(" "); // 先将数据存到baseMap中 if (temp.length == 3) { // 文件夹名类别名temp[0] String wordAndNumber = null; wordAndNumber = temp[1] + " " + temp[2]; if (baseMap.containsKey(temp[0])) { baseMap.get(temp[0]).add(wordAndNumber); } else { List<String> oneList = new ArrayList<String>(); oneList.add(wordAndNumber); baseMap.put(temp[0], oneList); } } // 读取数据完毕,全部保存在baseMap中 // 两层循环计算出每个类别中每个单词的概率 Iterator<Map.Entry<String, List<String>>> iterators = baseMap.entrySet().iterator(); while (iterators.hasNext()) {// 遍历类别 Map.Entry<String, List<String>> iterator = iterators.next(); int allWordsInClass = 0; // list遍历 Iterator<String> its = iterator.getValue().iterator(); // 得到每个类别的单词总数 while (its.hasNext()) { String[] temp1 = its.next().split(" "); allWordsInClass += Integer.parseInt(temp1[1]); } System.out.println(allWordsInClass);// 这个数据没有计算成功???? // // Map<String, List<Double>> pMap = new HashMap<String, List<Double>>(); // List<Double> pList = new ArrayList<Double>(); // 遍历每个单词的词频计算器概率 while (its.hasNext()) { String[] temp1 = its.next().split(" "); double p = (Integer.parseInt(temp1[1]) + 1) / (allWordsInClass + total); String classAndWord = iterator.getKey() + " " + temp1[0]; className.set(classAndWord); number.set(p); LOG.info("------>p = " + p); // context.write(className, number); mos.write(iterator.getKey(), temp1[0], p); } } }
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int tot = Integer.parseInt(conf.get("TOTALWORDS")); System.out.println("total === " + total); System.out.println("tot = " + tot); // 输入的格式如下: // ALB weekend 1 // ALB weeks 3 Map<String, Map<String, Integer>> baseMap = new HashMap<String, Map<String, Integer>>(); // 保存基础数据 Map<String, Map<String, Double>> priorMap = new HashMap<String, Map<String, Double>>(); // 保存每个单词出现的概率 String[] temp = value.toString().split(" "); // 先将数据存到baseMap中 if (temp.length == 3) { // 文件夹名类别名 if (baseMap.containsKey(temp[0])) { baseMap.get(temp[0]).put(temp[1], Integer.parseInt(temp[2])); } else { Map<String, Integer> oneMap = new HashMap<String, Integer>(); oneMap.put(temp[1], Integer.parseInt(temp[2])); baseMap.put(temp[0], oneMap); } } // 读取数据完毕,全部保存在baseMap中 // 两层循环计算出每个类别中每个单词的概率 Iterator<Map.Entry<String, Map<String, Integer>>> iterators = baseMap.entrySet().iterator(); while (iterators.hasNext()) {// 遍历类别 Map.Entry<String, Map<String, Integer>> iterator = iterators.next(); int allWordsInClass = 0; for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 遍历类别中的单词,先求出类别中的单词总数 allWordsInClass += entry.getValue(); } System.out.println(allWordsInClass);//这个数据没有计算成功 // Map<String, Double> pMap = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : iterator.getValue().entrySet()) {// 在遍历每个单词的个数计算单词出现的概率 double p = (entry.getValue()+ 1.0) / (allWordsInClass + tot);// pMap.put(entry.getKey(), p); priorMap.put(iterator.getKey(), pMap); className.set(iterator.getKey() + " " + entry.getKey()); number.set(p); LOG.info("------>p = " + p); context.write(className, number); // mos.write(iterator.getKey(), entry.getKey(), p); } } /* * value.set(temp[1]); number.set(Integer.parseInt(temp[2])); * mos.write(value, number, dirName); */ }