1. 实现一个简单的MapReduce
- map函数进行准备数据,reduce函数进行处理
引入pom依赖
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.7.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.1</version>
</dependency>
</dependencies>
input.txt 简化后的数据源
1950 0
1950 22
1950 -11
1949 111
1949 78
1898 2222
MaxTemperatureMapper.java 输入处理
package test.chapter02;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author baize
* @description 查找最高气温的mapper类;
* map函数进行准备数据,reduce函数进行处理;
* Mapper四个参数对应: 输入键/输入值 / 输出键/输出值
* @Date 2019/12/31 19:27
*/
public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 简单处理
String line = value.toString();
String year = line.substring(0, 4);
int airTemperature = Integer.parseInt(line.substring(5, line.length()));
// 写入到返回的键值对中
context.write(new Text(year), new IntWritable(airTemperature));
}
}
MaxTemperatureReducer.java 输出处理
package test.chapter02;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @author baize
* @description 使用Reducer处理得到最高温度
* @Date 2020/1/1 16:40
*/
public class MaxTemperatureReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// Reducer中处理键值对, 将一对多转为一对一,仅保留最大的温度值
int maxValue = Integer.MIN_VALUE;
for (IntWritable x : values) {
maxValue = Integer.max(x.get(), maxValue);
}
context.write(key, new IntWritable(maxValue));
}
}
Main.java MapReduce作业处理
package test.chapter02;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @author baize
* @description 负责运行MapReduce作业
* @Date 2019/12/31 19:48
*/
public class Main {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
if (args == null || args.length != 2) {
System.out.println("Error! 计算最大问题异常!");
System.exit(-1);
}
// 1.Job指定整个作业规范, 设置Jar包名字提交给Hadoop集群来运行作业
Job job = new Job();
job.setJarByClass(Main.class);
job.setJobName("主测试");
// 2.构造Job对象之后,指定输入输出数据的路径
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
// 3.指定map/reduce的Class类型
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
// 4.控制输入输出数据的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 5.waitForCompletion 提交作业等待执行完成
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
TODO
- 使用maven 命令打成jar包
- 放到hadoop上执行,查看输出