• 【Hadoop学习之九】MapReduce案例分析一-天气


    环境
      虚拟机:VMware 10
      Linux版本:CentOS-6.5-x86_64
      客户端:Xshell4
      FTP:Xftp4
      jdk8
      hadoop-3.1.1

    找出每个月气温最高的2天

    1949-10-01 14:21:02        34c
    1949-10-01 19:21:02        38c
    1949-10-02 14:01:02        36c
    1950-01-01 11:21:02        32c
    1950-10-01 12:21:02        37c
    1951-12-01 12:21:02        23c
    1950-10-02 12:21:02        41c
    1950-10-03 12:21:02        27c
    1951-07-01 12:21:02        45c
    1951-07-02 12:21:02        46c
    1951-07-03 12:21:03        47c
    package test.mr.tq;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    
    /**
     * @author Administrator
     * 客户端
     */
    public class MyTQ {
        
    
        /**
         * 找出每个月气温最高的2天
    
         * @param args
         */
        public static void main(String[] args) {
            //加载配置文件
            Configuration conf = new Configuration();
            
            try {
                //创建客户端
                Job job = Job.getInstance(conf,"tian qi");
                job.setJarByClass(MyTQ.class);
                
                //Map
                job.setMapperClass(TQMapper.class);
                job.setOutputKeyClass(TQ.class);
                job.setOutputValueClass(IntWritable.class);
                //分区类  处理大数据量均衡并发处理
                job.setPartitionerClass(TqPartitioner.class);
                //用于buffer字节数组内的key排序的比较类  温度最高的2天  需要排序
                job.setSortComparatorClass(TqSortComparator.class);
                
                //Reduce
                job.setReducerClass(TqReducer.class);
                job.setNumReduceTasks(2);
                //用于分组的比较类  年月相同的被视为一组
                job.setGroupingComparatorClass(TqGroupingComparator.class);
                
                //输入  输出
                Path input = new Path("/root/input");
                FileInputFormat.addInputPath(job, input);
                Path output = new Path("/root/output");
                if (output.getFileSystem(conf).exists(output))
                {
                    output.getFileSystem(conf).delete(output, true);
                }
                FileOutputFormat.setOutputPath(job, output);
                
                //提交
                System.exit(job.waitForCompletion(true) ? 0 : 1);
                
            } catch (Exception e) {
                e.printStackTrace();
            }
    
        }
    
    }
    package test.mr.tq;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    import org.apache.hadoop.io.WritableComparable;
    
    public class TQ implements WritableComparable<TQ>{
        
        private int year;
        private int month;
        private int day;
        private int wd;
        public int getYear() {
            return year;
        }
        public void setYear(int year) {
            this.year = year;
        }
        public int getMonth() {
            return month;
        }
        public void setMonth(int month) {
            this.month = month;
        }
        public int getDay() {
            return day;
        }
        public void setDay(int day) {
            this.day = day;
        }
        public int getWd() {
            return wd;
        }
        public void setWd(int wd) {
            this.wd = wd;
        }
        
        /**
         * 反序列化进来
         */
        @Override
        public void readFields(DataInput in) throws IOException {
            this.year = in.readInt();
            this.month = in.readInt();
            this.day = in.readInt();
            this.wd = in.readInt();
        }
        
        /**
         * 序列化出去
         */
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeInt(year);
            out.writeInt(month);
            out.writeInt(day);
            out.writeInt(wd);
        }
        
        @Override
        public int compareTo(TQ that) {
            //时间正序
            int y = Integer.compare(this.year, that.getYear());
            if (y == 0)
            {
                int m = Integer.compare(this.month, that.getMonth());
                if (m == 0)
                {
                    return Integer.compare(this.day, that.getDay());
                }
                return m;
            }
            return y;
        }
        
        
    
    }
    package test.mr.tq;
    
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    public class TqGroupingComparator extends WritableComparator {
        
        public TqGroupingComparator()
        {
            super(TQ.class,true);
        }
    
        /**
         * 面向reduce  按照年月分组
         * 年月不相同  就不属于同一组  
         * 返回0表示同一组
         */
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            TQ t1 = (TQ)a;
            TQ t2 = (TQ)b;
            int y = Integer.compare(t1.getYear(), t2.getYear());
            if (y==0)
            {
                return Integer.compare(t1.getMonth(), t2.getMonth());
            }
            return y;
        }
    }
    package test.mr.tq;
    
    import java.io.IOException;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.Calendar;
    import java.util.Date;
    
    import org.apache.commons.lang.StringUtils;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class TQMapper extends Mapper<LongWritable, Text, TQ, IntWritable> {
        //k:v映射的设计
        //    K                  V
    //    1949-10-01 14:21:02        34c
    //    1949-10-01 19:21:02        38c
    //    1949-10-02 14:01:02        36c
    //    1950-01-01 11:21:02        32c
    //    1950-10-01 12:21:02        37c
    //    1951-12-01 12:21:02        23c
    //    1950-10-02 12:21:02        41c
    //    1950-10-03 12:21:02        27c
    //    1951-07-01 12:21:02        45c
    //    1951-07-02 12:21:02        46c
    //    1951-07-03 12:21:03        47c
        
        TQ tq = new TQ();
        IntWritable vwd = new IntWritable();
        
        @Override
        protected void map(LongWritable key, Text value, 
                Context context) throws IOException, InterruptedException 
        {
            try 
            {
                //1951-07-03 12:21:03        47c
                String[] strs = StringUtils.split(value.toString(),"	");
                SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
                Date date = sdf.parse(strs[0]);
                
                Calendar cal = Calendar.getInstance();
                cal.setTime(date);
                
                //key
                tq.setYear(cal.get(Calendar.YEAR));
                tq.setMonth(cal.get(Calendar.MONTH)+1);
                tq.setDay(cal.get(Calendar.DAY_OF_MONTH));
                int wd = Integer.parseInt(strs[1].substring(0, strs[1].length()-1));
                tq.setWd(wd);
                
                //value
                vwd.set(wd);
                
                //输出
                context.write(tq, vwd);
            } 
            catch (ParseException e) 
            {
                e.printStackTrace();
            }
            
        }
    
    }
    package test.mr.tq;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.mapreduce.Partitioner;
    
    /**
     * @author wjy
     * K.V==>K.V.P
     * 分区规则设计  尽量使数据分区均衡  避免倾斜
     */
    public class TqPartitioner extends Partitioner<TQ, IntWritable> {
    
        @Override
        public int getPartition(TQ key, IntWritable value, int numPartitions) {
            
            return key.getYear() % numPartitions;
        }
    
    }
    package test.mr.tq;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class TqReducer extends Reducer<TQ, IntWritable, Text, IntWritable> {
        
        Text rkey = new Text();
        IntWritable rval  = new IntWritable();
        
        @Override
        protected void reduce(TQ key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException 
        {
            //相同的key为一组
            // 时间正序             温度倒序
            // 1970 01 01 40
            // 1970 01 02 38
            //迭代values key会随着变化
            int flg = 0;
            int day = 0;
            for (IntWritable wd : values) {
                if (flg == 0)
                {
                    day = key.getDay();
                    rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
                    rval.set(key.getWd());//wd.get()
                    context.write(rkey, rval);
                    flg ++;
                }
                
                if (flg != 0 && day != key.getDay())
                {
                    rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
                    rval.set(key.getWd());//wd.get()
                    context.write(rkey, rval);
                    break;
                }
            }
        }
    }
    package test.mr.tq;
    
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    public class TqSortComparator extends WritableComparator {
        
        //对字节数据中map进行排序  所以需要先将Key反序列化为对象  然后再进行比较
        public TqSortComparator()
        {
            super(TQ.class,true);
        }
        
        /**
         * 按照时间正序  温度倒序对字节数组排序
         */
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            TQ t1 = (TQ)a;
            TQ t2 = (TQ)b;
            int y = Integer.compare(t1.getYear(), t2.getYear());
            if (y==0)
            {
                int m = Integer.compare(t1.getMonth(), t2.getMonth());
                if (m == 0)
                {
                    //前面加一个负号  就可以实现倒序的效果
                    return - Integer.compare(t1.getWd(), t2.getWd());
                }
                return m;
            }
            return y;
        }
    
    }
  • 相关阅读:
    CF163E e-Government
    P2336 [SCOI2012]喵星球上的点名
    数据结构
    数字逻辑
    建筑制图与识图
    建筑施工
    电力系统分析
    现代物流基础
    电子商务网站设计与管理
    数字电子技术基础
  • 原文地址:https://www.cnblogs.com/cac2020/p/10308075.html
Copyright © 2020-2023  润新知