• MapReduce天气查询实列


    MapReduce天气查询实列

    天气统计案例

    2000-01-01	16	29
    2000-01-02	14	40
    2000-01-03	23	35
    2000-01-04	18	25
    2000-01-05	14	33
    2000-01-06	14	-4
    ......
    2000-01-18	23	26
    2000-01-19	10	-5
    

    找出每个月中最高天气的两天

    提交作业类

    WeatherApp.class

    package icu.shaoyayu.hadoop.weather;
    
    import icu.shaoyayu.hadoop.weather.entity.WeatherMapOutputKeyClass;
    import icu.shaoyayu.hadoop.weather.mapper.WeatherMapper;
    import icu.shaoyayu.hadoop.weather.reduce.WeatherReduce;
    import icu.shaoyayu.hadoop.weather.util.WeatherGroupingComparator;
    import icu.shaoyayu.hadoop.weather.util.WeatherPartitioner;
    import icu.shaoyayu.hadoop.weather.util.WeatherSortComparator;
    import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @author 邵涯语
     * @date 2020/4/17 16:41
     * @Version :
     */
    public class WeatherApp {
    
        private static final Log LOG = LogFactory.getLog(WeatherApp.class.getName());
    
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
            //初始换配置
            Configuration configuration = new Configuration(true);
    
            //获取作业的实列
            Job job = Job.getInstance(configuration);
            //设置启动类
            job.setJarByClass(WeatherApp.class);
    
            /**
             *  在public class JobContextImpl implements JobContext 中有配置
             *  在JobContextImpl中提到获取
             *   conf.getClass(INPUT_FORMAT_CLASS_ATTR, TextInputFormat.class);
             *  TextInputFormat.class是默认的配置,当然要是可以配置
             *  job.setInputFormatClass(MyInputFormatClass.class);
             */
    
            /**
             * 准备一个我们自己的mapper类 默认的是Mapper类
             */
            job.setMapperClass(WeatherMapper.class);
    
            /**
             * map输出的key,要实现序列化和反序列化接口
             */
            job.setMapOutputKeyClass(WeatherMapOutputKeyClass.class);
    
            /**
             * 设置一个输出的value的类型
             */
            job.setMapOutputValueClass(IntWritable.class);
    
            /**
             * 设置一个分区器
             */
            job.setPartitionerClass(WeatherPartitioner.class);
    
            /**
             * 设置一个排序比较累
             */
            job.setSortComparatorClass(WeatherSortComparator.class);
    
            /**
             * 提交作业等待完成
             *
             */
            job.waitForCompletion(true);
    
            /**
             * 设置一个Combiner
             * job.setCombinerClass(WeatherCombiner.class);
             */
    
            //==========================Reduce阶段==============================
    
            /**
             * 分组比较器
             */
            job.setGroupingComparatorClass(WeatherGroupingComparator.class);
    
            job.setReducerClass(WeatherReduce.class);
    
            //设置文件输入路径
            Path InputPath = new Path("/data/weather/input/");
            FileInputFormat.setInputPaths(job,InputPath);
    
    
            //设置输出路径
            Path outputPath = new Path("data/weather/output");
            //如果路劲存在,递归删除路径
            if (outputPath.getFileSystem(configuration).exists(outputPath)){
                outputPath.getFileSystem(configuration).delete(outputPath,true);
            }
            FileOutputFormat.setOutputPath(job,outputPath);
    
            //设置两个Reduce的数量
            job.setNumReduceTasks(2);
    
        }
    
    }
    

    自定义Mapper输出的key对象

    WeatherMapOutputKeyClass.class

    package icu.shaoyayu.hadoop.weather.entity;
    
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    /**
     * @author 邵涯语
     * @date 2020/4/17 17:36
     * @Version :
     */
    public class WeatherMapOutputKeyClass implements WritableComparable<WeatherMapOutputKeyClass> {
    
        private int year;
        private int month;
        private int day;
        private int temperature;
    
        public int getYear() {
            return year;
        }
    
        public void setYear(int year) {
            this.year = year;
        }
    
        public int getMonth() {
            return month;
        }
    
        public void setMonth(int month) {
            this.month = month;
        }
    
        public int getDay() {
            return day;
        }
    
        public void setDay(int day) {
            this.day = day;
        }
    
        public int getTemperature() {
            return temperature;
        }
    
        public void setTemperature(int temperature) {
            this.temperature = temperature;
        }
    
        /**
         * Comparison method
         * 排序的方法,默认的是正序的排序
         * @param keyClass
         * @return
         */
        @Override
        public int compareTo(WeatherMapOutputKeyClass keyClass) {
            int sizeDetermination = Integer.compare(this.year,keyClass.year);
            if (sizeDetermination==0){
                //相等的时候判定月
                sizeDetermination = Integer.compare(this.month,keyClass.month);
                if (sizeDetermination==0){
                    return Integer.compare(this.day,keyClass.day);
                }else {
                    return sizeDetermination;
                }
            }
            return sizeDetermination;
        }
    
        /**
         * Serialization method
         * @param out
         * @throws IOException
         */
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeInt(this.year);
            out.writeInt(this.month);
            out.writeInt(this.day);
            out.writeInt(this.temperature);
        }
    
        /**
         * Deserialization method
         * @param in
         * @throws IOException
         */
        @Override
        public void readFields(DataInput in) throws IOException {
            this.year = in.readInt();
            this.month = in.readInt();
            this.day = in.readInt();
            this.temperature = in.readInt();
        }
    
        @Override
        public String toString() {
            return year +"-"+ month +"-"+ day ;
        }
    }
    

    自定义Mapper类

    WeatherMapper.class

    package icu.shaoyayu.hadoop.weather.mapper;
    
    import icu.shaoyayu.hadoop.weather.entity.WeatherMapOutputKeyClass;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.MapTask;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.util.StringUtils;
    
    import java.io.IOException;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.Calendar;
    import java.util.Date;
    
    /**
     * @author 邵涯语
     * @date 2020/4/18 11:14
     * @Version :
     * 默认的输入格式化类还是TextInputFormat
     */
    public class WeatherMapper extends Mapper<LongWritable, Text, WeatherMapOutputKeyClass, IntWritable> {
    
        WeatherMapOutputKeyClass mWeatherKeyClass = new WeatherMapOutputKeyClass();
        IntWritable mLatitudeValue =  new IntWritable();
    
        /**
         * 重写map的方法
         * @param key
         * @param value
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            /*
            2000-01-01	16	29
            2000-01-02	14	40
            2000-01-03	23	35
            2000-01-04	18	25
            2000-01-05	14	33
            2000-01-06	14	-4
            2000-01-07	4	24
             */
    
            try {
                String[] sts = StringUtils.split("	");
                SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
                Date date = sdf.parse(sts[0]);
                Calendar cal = Calendar.getInstance();
                cal.setTime(date);
                //对时间赋值
                mWeatherKeyClass.setYear(cal.get(Calendar.YEAR));
                mWeatherKeyClass.setMonth(cal.get(Calendar.MONTH)+1);
                mWeatherKeyClass.setDay(cal.get(Calendar.DAY_OF_MONTH));
                int temperature = Integer.parseInt(sts[sts.length-1].substring(0,sts[sts.length-1].length()-1));
                mWeatherKeyClass.setTemperature(temperature);
                mLatitudeValue.set(temperature);
                //输出
                context.write(mWeatherKeyClass,mLatitudeValue);
            } catch (ParseException e) {
                e.printStackTrace();
            }
    
        }
    }
    

    自定义分区器

    WeatherPartitioner.class

    package icu.shaoyayu.hadoop.weather.util;
    
    import icu.shaoyayu.hadoop.weather.entity.WeatherMapOutputKeyClass;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.mapreduce.Partitioner;
    
    /**
     * @author 邵涯语
     * @date 2020/4/18 12:00
     * @Version :
     */
    public class WeatherPartitioner extends Partitioner<WeatherMapOutputKeyClass, IntWritable> {
        @Override
        public int getPartition(WeatherMapOutputKeyClass keyClass, IntWritable intWritable, int numPartitions) {
    
            return keyClass.hashCode()%numPartitions;
    
        }
    }
    

    自定义一个排序类

    WeatherSortComparator.class

    package icu.shaoyayu.hadoop.weather.util;
    
    import icu.shaoyayu.hadoop.weather.entity.WeatherMapOutputKeyClass;
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    /**
     * @author 邵涯语
     * @date 2020/4/18 12:08
     * @Version :
     */
    public class WeatherSortComparator extends WritableComparator {
    
        /**
         * 实例化
         */
        public WeatherSortComparator(){
            super(WeatherMapOutputKeyClass.class,true);
        }
    
        /**
         * 比较,按照年月做正序温度做倒序
         * @param a
         * @param b
         * @return
         */
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
    
            WeatherMapOutputKeyClass keyClass1 = (WeatherMapOutputKeyClass) a;
            WeatherMapOutputKeyClass keyClass2 = (WeatherMapOutputKeyClass) b;
    
            int contrast = Integer.compare(keyClass1.getYear(),keyClass2.getYear());
            //比较年
            if (contrast==0){
                contrast = Integer.compare(keyClass1.getMonth(),keyClass2.getMonth());
                //比较月份
                if (contrast==0){
                    //温度进行倒序比较
                    return -Integer.compare(keyClass1.getTemperature(),keyClass2.getTemperature());
                }else {
                    return contrast;
                }
            }else {
                return contrast;
            }
    
        }
    }
    

    自定义一个分组器

    WeatherGroupingComparator.class

    package icu.shaoyayu.hadoop.weather.util;
    
    import icu.shaoyayu.hadoop.weather.entity.WeatherMapOutputKeyClass;
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    /**
     * @author 邵涯语
     * @date 2020/4/18 13:00
     * @Version :
     */
    public class WeatherGroupingComparator extends WritableComparator {
    
        public WeatherGroupingComparator(){
            super(WeatherMapOutputKeyClass.class,true);
        }
    
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            WeatherMapOutputKeyClass keyClass1 = (WeatherMapOutputKeyClass) a;
            WeatherMapOutputKeyClass keyClass2 = (WeatherMapOutputKeyClass) b;
    
            int contrast = Integer.compare(keyClass1.getYear(),keyClass2.getYear());
            //比较年
            if (contrast==0){
                return  Integer.compare(keyClass1.getMonth(),keyClass2.getMonth());
            }else {
                return contrast;
            }
        }
    }
    

    自定义一个Reduce

    package icu.shaoyayu.hadoop.weather.reduce;
    
    
    import icu.shaoyayu.hadoop.weather.entity.WeatherMapOutputKeyClass;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    /**
     * @author 邵涯语
     * @date 2020/4/18 13:08
     * @Version :
     */
    public class WeatherReduce extends Reducer<WeatherMapOutputKeyClass, IntWritable, Text, IntWritable> {
    
        Text mRKey = new Text();
        IntWritable mRValue = new IntWritable();
    
        /**
         * 重写Reduce方法
         * @param key
         * @param values
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void reduce(WeatherMapOutputKeyClass key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            //values 分为每个月组的数据
            int flg = 0;
            int day = 0;
            for (IntWritable value : values) {
                if (flg==0){
                    mRKey.set(key.toString());
                    mRValue.set(key.getTemperature());
                    flg++;
                    day = key.getDay();
                    context.write(mRKey,mRValue);
                }
                if (flg!=0 && day!=key.getDay()){
                    mRKey.set(key.toString());
                    mRValue.set(key.getTemperature());
                    context.write(mRKey,mRValue);
                    break;
                }
            }
        }
    }
    
    记得加油学习哦^_^
  • 相关阅读:
    Animation用法
    英文口语及书写常用句型汇总1
    Jqplot使用总结之二(双Y轴)
    SqlServer扩展存储过程
    SQL Server常见基础操作
    C# 利用ITextSharp导出PDF文件
    go常量
    ARP协议
    go数组
    go基本数据类型
  • 原文地址:https://www.cnblogs.com/shaoyayu/p/13434001.html
Copyright © 2020-2023  润新知