Hadoop全排序

1、未分区，按照key排序

1、mapper，输出<k,v>都为intwritable

package com.cr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class MaxTempMapper extends Mapper<LongWritable,Text,IntWritable,IntWritable>{

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String[] arr = value.toString().split(" ");
        context.write(new IntWritable(Integer.parseInt(arr[0])),new IntWritable(Integer.parseInt(arr[1])));

    }
    }

2、reducer，输入输出<k,v>都为intwritable

package com.cr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class MaxTempReducer extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable> {
    @Override
    protected void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        int max = Integer.MIN_VALUE;
        for(IntWritable iw :values){
            max = max > iw.get()? max : iw.get();
        }
        context.write(key,new IntWritable(max));

    }
}

3、maxTempApp

package com.cr.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MaxTempApp {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //单例作业
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        Job job = Job.getInstance(conf);

        //设置job的各种属性
        job.setJobName("MaxTempApp");                 //设置job名称
        job.setJarByClass(MaxTempApp.class);              //设置搜索类
        job.setInputFormatClass(TextInputFormat.class);

        //设置输入路径
        FileInputFormat.addInputPath(job,new Path((args[0])));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setMapperClass(MaxTempMapper.class);               //设置mapper类
        job.setReducerClass(MaxTempReducer.class);               //设置reduecer类
        job.setNumReduceTasks(1);                         //设置reduce个数

        job.setMapOutputKeyClass(IntWritable.class);            //设置之map输出key
        job.setMapOutputValueClass(IntWritable.class);   //设置map输出value
        job.setOutputKeyClass(IntWritable.class);               //设置mapreduce 输出key
        job.setOutputValueClass(IntWritable.class);      //设置mapreduce输出value
        job.waitForCompletion(true);
    }
}

4、按照key升序排列

2、自定义分区

1、定义分区函数

package com.cr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;

public class MyPartitoner extends Partitioner<IntWritable,IntWritable> {


    public int getPartition(IntWritable year, IntWritable temp, int parts) {
        int tmp = year.get() - 1995;
        if(tmp < 8){
            return 0;
        }
        else if(tmp >=8 && tmp <16){
            return 1;
        }
        else {
            return 2;
        }
    }
}

2、增加分区数和reducer个数

package com.cr.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MaxTempApp {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //单例作业
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","file:///");
        Job job = Job.getInstance(conf);

        //设置job的各种属性
        job.setJobName("MaxTempApp");                 //设置job名称
        job.setJarByClass(MaxTempApp.class);              //设置搜索类
        job.setInputFormatClass(TextInputFormat.class);

        //设置输入路径
        FileInputFormat.addInputPath(job,new Path((args[0])));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setPartitionerClass(MyPartitoner.class);
        job.setMapperClass(MaxTempMapper.class);               //设置mapper类
        job.setReducerClass(MaxTempReducer.class);               //设置reduecer类
        job.setNumReduceTasks(3);                         //设置reduce个数

        job.setMapOutputKeyClass(IntWritable.class);            //设置之map输出key
        job.setMapOutputValueClass(IntWritable.class);   //设置map输出value
        job.setOutputKeyClass(IntWritable.class);               //设置mapreduce 输出key
        job.setOutputValueClass(IntWritable.class);      //设置mapreduce输出value
        job.waitForCompletion(true);
    }
}

3、结果根据自定义分区范围进入不同的分区

part-r-00000

part-r-00001

part-r-00002

欢迎关注我的公众号：小秋的博客 CSDN博客：https://blog.csdn.net/xiaoqiu_cr github:https://github.com/crr121 联系邮箱：rongchen633@gmail.com 有什么问题可以给我留言噢~

相关阅读:
Linux 下如何查看一个组内的有哪些用户
 Linux下查看用户列表
 用pureftpd+pureDB虚拟用户,建立一个简单安全(不需要数据库支持)的linux ftp站
 pure-ftp中如何设置匿名帐号，自定义匿名帐号访问指定目录
 PUTTY中永久更改字体大小
 Pure-ftpd 如何配置多用户，且A用户具有上传下载权限，B用户只有下载权限？
windows10访问ftp中文乱码怎么办？
WPF DynamicDataDisplay.dll 下载
 C# windows presentation foundation 项目中不支持Application
c# NDP472-KB4054530-x86-x64-AllOS-CHS下载
原文地址：https://www.cnblogs.com/flyingcr/p/10326960.html