MapReduce实验——去重
根据商品id进行去重,统计用户收藏商品中都有哪些商品被收藏
遇到的问题:
程序运行到mapper的context.write()时就死掉,job.waitForCompletion(true)返回false
原因及解决:
linux虚拟机hadoop启动时没有sudo,重启就没有问题了。
/** * MapReduce实验——去重 * 根据商品id进行去重,统计用户收藏商品中都有哪些商品被收藏 * * 遇到的问题: * 程序运行到mapper的context.write()时就死掉,job.waitForCompletion(true)返回false * 原因及解决: * linux虚拟机hadoop启动时没有sudo,重启就没有问题了 * */ package MapReducetests; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class MapReducetest1 { static String INPUT_PATH="hdfs://192.168.57.128:9000/testhdfs1026/run/input/mapreducetest1"; static String OUTPUT_PATH="hdfs://192.168.57.128:9000/testhdfs1026/run/output/test1"; /* * Mapper * NullWritable是Writable的一个特殊类,序列化的长度为0,实现方法为空实现 * 如果你不需要使用键或值,你就可以将键或值声明为NullWritable。 */ static class MyMapper extends Mapper<Object,Text,Text,NullWritable> { //map将输入中的value复制到输出数据的key上,并直接输出 private static Text newKey=new Text(); protected void map(Object key, Text value, Context context) //Context context 记录输入的key和value throws IOException, InterruptedException { String line=value.toString(); // toString()方法返回反映这个对象的字符串 //System.out.println("Map.line:"+line); String arr[]=line.split(" "); //split拆分 //System.out.println("步骤:打印arr[1]"+arr[1]); newKey.set(arr[1]); //System.out.println("步骤:newKey.set(arr[1]);"); //System.out.println("打印newkey"+newKey); context.write(newKey,NullWritable.get()); //key是唯一的,作为数据,即实现去重 //System.out.println("步骤:context.write(newKey,NullWritable.get());"); //context.write(字段1, value);输出就是以字段1为key的数据 //System.out.println("Map.newkey:"+newKey); } } //Reducer static class MyReduce extends Reducer<Text,NullWritable,Text,NullWritable> { protected void reduce(Text key,Iterable<NullWritable> values,Context context)//NullWritable变量经过shuffle阶段变成Iterable容器 throws IOException,InterruptedException { context.write(key,NullWritable.get()); //map传给reduce的数据已经做完数据去重,输出即可 System.out.println("Reduce.key:"+key); } } public static void main(String[] args) throws Exception{ Path inputpath=new Path(INPUT_PATH); Path outputpath=new Path(OUTPUT_PATH); Configuration conf=new Configuration(); System.out.println("Start"); Job job=new Job(conf,"MapReducetest1"); job.setJarByClass(MapReducetest1.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPaths(job, INPUT_PATH); FileOutputFormat.setOutputPath(job,outputpath); boolean flag = job.waitForCompletion(true); /* * wait for completion 工作等待完成。 * Job运行是通过job.waitForCompletion(true), * true表示将运行进度等信息及时输出给用户,false的话只是等待作业结束 */ System.out.println(flag); System.exit(flag? 0 : 1); } }