• MR-join连接


    package com.bw.mr;
    
    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataInputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class MapJoin {
        // 两个文件都在内存中 一个机器的内存中
        // 通过集合放入一个机器的内存中
        // 通过map任务放入内存中
        // join连接共有的属性连接
        public static class JMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
            // 将user放入内存中
            private Map<String, String> map = new HashMap<String, String>();
    
            // 重新setup方法 运行在map前面
            @Override
            protected void setup(Mapper<LongWritable, Text, Text, NullWritable>.Context context)
                    throws IOException, InterruptedException {
                // 创建连接
                FileSystem fs = FileSystem.get(context.getConfiguration());
                // user01.txt hdfs 拉取信息 到内存map
                FSDataInputStream in = fs.open(new Path("hdfs://linux04:9000/user.txt"));
                // 读取 BufferedReader 读取器
                BufferedReader bf = new BufferedReader(new InputStreamReader(in));
                // 一行一行读取
                String line = null;
                while ((line = bf.readLine()) != null) {
                    String[] split = line.split(" ");
                    map.put(split[0], split[1] + " " + split[2] + " " + split[3]);
                }
                bf.close();
                in.close();
            }
            @Override
            protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
                    throws IOException, InterruptedException {
                String[] strs = value.toString().split(" ");
                String orderId = strs[0];
                String uid = strs[1];
                String money = strs[2];
                //join 的uid 关联】
                String productInfo = map.get(uid);
                context.write(new Text(orderId+" "+uid+" "+productInfo), NullWritable.get());
            }
        }
        public static void main(String[] args) throws Exception {
            Configuration conf =new  Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(MapJoin.class);
            job.setMapperClass(JMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(NullWritable.class);
            job.setNumReduceTasks(0);
            FileInputFormat.addInputPath(job,new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            job.waitForCompletion(true);
            
        }
    }
  • 相关阅读:
    常用FPGA功能块记录
    鸿蒙相关
    微波相关
    Python库大全
    C#环境实现代码的自动生成编译
    STM32相关
    硬件相关
    C# 获取枚举中文注释
    C# 获取自定义特性值
    Asp.Net Core 中 Host 与 WebHost的区别
  • 原文地址:https://www.cnblogs.com/JBLi/p/10765134.html
Copyright © 2020-2023  润新知