• 学习日志---8


     MapReduce倒排索引

    代码:

    MyMapper.java

     1 package dpsy;
     2 
     3 import java.io.IOException;
     4 
     5 import org.apache.hadoop.io.LongWritable;
     6 import org.apache.hadoop.io.Text;
     7 import org.apache.hadoop.mapreduce.Mapper;
     8 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
     9 
    10 public class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
    11 
    12     @Override
    13     protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
    14             throws IOException, InterruptedException {
    15         // TODO 自动生成的方法存根
    16         String thisLine=value.toString();
    17         String words[]=thisLine.split(" ");
    18         for (String word : words) {
    19             FileSplit fileSplit=(FileSplit)context.getInputSplit();
    20             String file=fileSplit.getPath().getName();
    21             context.write(new Text(word+"&"+file), new Text("1"));
    22         }
    23     }
    24 
    25     
    26     
    27 }

    MyCombiner.java

    package dpsy;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class MyCombiner extends Reducer<Text, Text, Text, Text> {
    
        @Override
        protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context arg2)
                throws IOException, InterruptedException {
            // TODO 自动生成的方法存根
            int sum=0;
            for (Text text : arg1) {
                sum+=Integer.parseInt(text.toString());
            }
            String s[]=arg0.toString().split("&");
            String outKey=s[0];
            String outValue="("+s[1]+":"+sum+")";
            arg2.write(new Text(outKey), new Text(outValue));
        }
        
    
    }

    MyReducer.java

    package dpsy;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class MyReducer extends Reducer<Text, Text, Text, Text> {
    
        @Override
        protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context arg2)
                throws IOException, InterruptedException {
            // TODO 自动生成的方法存根
            StringBuffer outValue=new StringBuffer("");
            for (Text arg : arg1) {
                outValue.append(arg.toString());
            }
            arg2.write(arg0, new Text(outValue.toString()));
        }
    
        
        
    }

    MyJob.java

    package dpsy;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    
    
    public class MyJob extends Configured implements Tool{
        
        public static void main(String[] args) throws Exception {
            System.setProperty("hadoop.home.dir", "E:\hadoop");
            MyJob myJob=new MyJob();
            ToolRunner.run(myJob, null);
        }
        public int run(String[] args) throws Exception {
            // TODO Auto-generated method stub
            Configuration conf=new Configuration();
            conf.set("fs.default.name", "hdfs://192.168.137.11:9000");
            Job job=Job.getInstance(conf);
            job.setJarByClass(MyJob.class);
            job.setMapperClass(MyMapper.class);
            job.setCombinerClass(MyCombiner.class);
            job.setReducerClass(MyReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            FileInputFormat.addInputPath(job, new Path("/hadoop/dpsy"));
            FileOutputFormat.setOutputPath(job, new Path("/dpsyResult"));
            job.waitForCompletion(true);
            
            return 0;
        }
    
    }

     结果:

  • 相关阅读:
    【算法】剑指第二版面试题6 :从尾到头打印链表
    Go电商秒杀 (1)
    Go秒杀服务端优化
    Linux AUFS 文件系统
    mount挂载虚拟文件系统
    自己动手写docker之Linux命令(CentOS7.7)
    自己动手写docker之namespace和cgroup
    【协作式原创】自己动手写docker之urfave cli
    Go 1.14之前的死循环Case
    linux下定时执行任务的方法
  • 原文地址:https://www.cnblogs.com/yifengyifeng/p/9323463.html
Copyright © 2020-2023  润新知