• MapReduce辅助排序


    需求:订单数据
    
        求出每个订单中最贵的商品?
    
        订单id正序,成交金额倒序。
        结果文件三个,每个结果文件只要一条数据。

    1.Mapper类

    package com.css.order.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
    
        @Override
        protected void map(LongWritable key, Text value,Context context)
                        throws IOException, InterruptedException {
            // 获取每行数据
            String line = value.toString();
            // 切分数据
            String[] fields = line.split("	");
            // 取出字段
            Integer order_id = Integer.parseInt(fields[0]);
            Double price = Double.parseDouble(fields[2]);
            OrderBean orderBean = new OrderBean(order_id, price);
            // 输出
            context.write(orderBean, NullWritable.get());
        }
    }

    2.Reducer类

    package com.css.order.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
        @Override
        protected void reduce(OrderBean key, Iterable<NullWritable> values,
                Context context)throws IOException, InterruptedException {
            // 输出
            context.write(key, NullWritable.get());
        }
    }

    3.封装类

    package com.css.order.mr;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    import org.apache.hadoop.io.WritableComparable;
    
    public class OrderBean implements WritableComparable<OrderBean>{
    
        // 定义属性
        private int order_id; // 定义订单id
        private double price; // 价格
        
        public OrderBean(){        
        }
        
        public OrderBean(int order_id, double price) {
            super();
            this.order_id = order_id;
            this.price = price;
        }
        
        public int getOrder_id() {
            return order_id;
        }
    
        public void setOrder_id(int order_id) {
            this.order_id = order_id;
        }
    
        public double getPrice() {
            return price;
        }
    
        public void setPrice(double price) {
            this.price = price;
        }
        
        // 序列化
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeInt(order_id);
            out.writeDouble(price);
        }
    
        // 反序列化
        @Override
        public void readFields(DataInput in) throws IOException {
            order_id = in.readInt();
            price = in.readDouble();
        }
    
        @Override
        public String toString() {
            return order_id + "	" + price;
        }
    
        // 排序
        @Override
        public int compareTo(OrderBean o) {
            int rs;
            // 根据id排序
            if (order_id > o.order_id) {
                // id 大的往下排
                rs = 1;
            }else if (order_id < o.order_id) {
                // id小的往上排
                rs = -1;
            }else {
                // id相等 价格高的往上排
                rs = price > o.getPrice() ? -1 : 1;
            }
            return rs;
        }
        
    }

    4.自定义分区类

    package com.css.order.mr;
    
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Partitioner;
    
    public class OrderPartitioner extends Partitioner<OrderBean, NullWritable>{
    
        @Override
        public int getPartition(OrderBean key, NullWritable value, int numPartitions) {
            return (key.getOrder_id() & Integer.MAX_VALUE) % numPartitions;
        }
    }

    5.自定义排序分组类

    package com.css.order.mr;
    
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    public class OrderGroupingComparator extends WritableComparator{
    
        // 构造必须加
        protected OrderGroupingComparator() {
            super(OrderBean.class, true);
        }
    
        // 重写比较
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            OrderBean aBean = (OrderBean) a;
            OrderBean bBean = (OrderBean) b;
            int rs;
            // id不同不是同一对象
            if (aBean.getOrder_id() > bBean.getOrder_id()) {
                rs = 1;
            }else if (aBean.getOrder_id() < bBean.getOrder_id()) {
                rs = -1;
            }else {
                rs = 0;
            }
            return rs;
        }
    }

    6.Driver类

    package com.css.order.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class OrderDriver {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            // 1.获取job信息
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            
            // 2.获取jar包
            job.setJarByClass(OrderDriver.class);
            
            // 3.获取mapper与reducer
            job.setMapperClass(OrderMapper.class);
            job.setReducerClass(OrderReducer.class);
            
            // 4.定义mapper输出类型
            job.setMapOutputKeyClass(OrderBean.class);
            job.setMapOutputValueClass(NullWritable.class);
            
            // 5.定义reducer输出类型
            job.setOutputKeyClass(OrderBean.class);
            job.setOutputValueClass(NullWritable.class);
            
            // 6.设置reducer端的分组
            job.setGroupingComparatorClass(OrderGroupingComparator.class);
            
            // 7.设置分区
            job.setPartitionerClass(OrderPartitioner.class);
            
            // 8.设置reduceTask个数
            job.setNumReduceTasks(3);
            
            // 9.设置数据的输入与输出
            FileInputFormat.setInputPaths(job, new Path("c://in1026"));
            FileOutputFormat.setOutputPath(job, new Path("c://out1026"));
            
            // 10.提交任务
            boolean rs = job.waitForCompletion(true);
            System.out.println(rs ? 0 : 1);
        }
    }

    7.mr输入文件order.java

    1001    Tmall_01    998
    1001    Tmall_06    88.8
    1001    Tmall_03    522.8
    1002    Tmall_03    522.8
    1002    Tmall_04    132.4
    1002    Tmall_05    372.4
    1003    Tmall_01    998
    1003    Tmall_02    8.5
    1003    Tmall_04    132.4

    8.输出文件

    (1)part-r-00000
    1002    522.82)part-r-00001
    1003    998.03)part-r-00002
    1001    998.0
  • 相关阅读:
    vue 组件通信(全)
    clickoutside 代码实现
    reset css 样式重置
    vue computed 无法deep的问题
    sessionStorage的总结
    Windows系统maven安装配置
    Windows系统JDK安装配置
    开篇
    JIT即时编译器
    CLR基础
  • 原文地址:https://www.cnblogs.com/areyouready/p/9904055.html
Copyright © 2020-2023  润新知