环境
虚拟机:VMware 10
Linux版本:CentOS-6.5-x86_64
客户端:Xshell4
FTP:Xftp4
jdk8
hadoop-3.1.1
找出每个月气温最高的2天
1949-10-01 14:21:02 34c 1949-10-01 19:21:02 38c 1949-10-02 14:01:02 36c 1950-01-01 11:21:02 32c 1950-10-01 12:21:02 37c 1951-12-01 12:21:02 23c 1950-10-02 12:21:02 41c 1950-10-03 12:21:02 27c 1951-07-01 12:21:02 45c 1951-07-02 12:21:02 46c 1951-07-03 12:21:03 47c
package test.mr.tq; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * @author Administrator * 客户端 */ public class MyTQ { /** * 找出每个月气温最高的2天 * @param args */ public static void main(String[] args) { //加载配置文件 Configuration conf = new Configuration(); try { //创建客户端 Job job = Job.getInstance(conf,"tian qi"); job.setJarByClass(MyTQ.class); //Map job.setMapperClass(TQMapper.class); job.setOutputKeyClass(TQ.class); job.setOutputValueClass(IntWritable.class); //分区类 处理大数据量均衡并发处理 job.setPartitionerClass(TqPartitioner.class); //用于buffer字节数组内的key排序的比较类 温度最高的2天 需要排序 job.setSortComparatorClass(TqSortComparator.class); //Reduce job.setReducerClass(TqReducer.class); job.setNumReduceTasks(2); //用于分组的比较类 年月相同的被视为一组 job.setGroupingComparatorClass(TqGroupingComparator.class); //输入 输出 Path input = new Path("/root/input"); FileInputFormat.addInputPath(job, input); Path output = new Path("/root/output"); if (output.getFileSystem(conf).exists(output)) { output.getFileSystem(conf).delete(output, true); } FileOutputFormat.setOutputPath(job, output); //提交 System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (Exception e) { e.printStackTrace(); } } }
package test.mr.tq; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class TQ implements WritableComparable<TQ>{ private int year; private int month; private int day; private int wd; public int getYear() { return year; } public void setYear(int year) { this.year = year; } public int getMonth() { return month; } public void setMonth(int month) { this.month = month; } public int getDay() { return day; } public void setDay(int day) { this.day = day; } public int getWd() { return wd; } public void setWd(int wd) { this.wd = wd; } /** * 反序列化进来 */ @Override public void readFields(DataInput in) throws IOException { this.year = in.readInt(); this.month = in.readInt(); this.day = in.readInt(); this.wd = in.readInt(); } /** * 序列化出去 */ @Override public void write(DataOutput out) throws IOException { out.writeInt(year); out.writeInt(month); out.writeInt(day); out.writeInt(wd); } @Override public int compareTo(TQ that) { //时间正序 int y = Integer.compare(this.year, that.getYear()); if (y == 0) { int m = Integer.compare(this.month, that.getMonth()); if (m == 0) { return Integer.compare(this.day, that.getDay()); } return m; } return y; } }
package test.mr.tq; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class TqGroupingComparator extends WritableComparator { public TqGroupingComparator() { super(TQ.class,true); } /** * 面向reduce 按照年月分组 * 年月不相同 就不属于同一组 * 返回0表示同一组 */ @Override public int compare(WritableComparable a, WritableComparable b) { TQ t1 = (TQ)a; TQ t2 = (TQ)b; int y = Integer.compare(t1.getYear(), t2.getYear()); if (y==0) { return Integer.compare(t1.getMonth(), t2.getMonth()); } return y; } }
package test.mr.tq; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class TQMapper extends Mapper<LongWritable, Text, TQ, IntWritable> { //k:v映射的设计 // K V // 1949-10-01 14:21:02 34c // 1949-10-01 19:21:02 38c // 1949-10-02 14:01:02 36c // 1950-01-01 11:21:02 32c // 1950-10-01 12:21:02 37c // 1951-12-01 12:21:02 23c // 1950-10-02 12:21:02 41c // 1950-10-03 12:21:02 27c // 1951-07-01 12:21:02 45c // 1951-07-02 12:21:02 46c // 1951-07-03 12:21:03 47c TQ tq = new TQ(); IntWritable vwd = new IntWritable(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { try { //1951-07-03 12:21:03 47c String[] strs = StringUtils.split(value.toString()," "); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); Date date = sdf.parse(strs[0]); Calendar cal = Calendar.getInstance(); cal.setTime(date); //key tq.setYear(cal.get(Calendar.YEAR)); tq.setMonth(cal.get(Calendar.MONTH)+1); tq.setDay(cal.get(Calendar.DAY_OF_MONTH)); int wd = Integer.parseInt(strs[1].substring(0, strs[1].length()-1)); tq.setWd(wd); //value vwd.set(wd); //输出 context.write(tq, vwd); } catch (ParseException e) { e.printStackTrace(); } } }
package test.mr.tq; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Partitioner; /** * @author wjy * K.V==>K.V.P * 分区规则设计 尽量使数据分区均衡 避免倾斜 */ public class TqPartitioner extends Partitioner<TQ, IntWritable> { @Override public int getPartition(TQ key, IntWritable value, int numPartitions) { return key.getYear() % numPartitions; } }
package test.mr.tq; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class TqReducer extends Reducer<TQ, IntWritable, Text, IntWritable> { Text rkey = new Text(); IntWritable rval = new IntWritable(); @Override protected void reduce(TQ key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //相同的key为一组 // 时间正序 温度倒序 // 1970 01 01 40 // 1970 01 02 38 //迭代values key会随着变化 int flg = 0; int day = 0; for (IntWritable wd : values) { if (flg == 0) { day = key.getDay(); rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay()); rval.set(key.getWd());//wd.get() context.write(rkey, rval); flg ++; } if (flg != 0 && day != key.getDay()) { rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay()); rval.set(key.getWd());//wd.get() context.write(rkey, rval); break; } } } }
package test.mr.tq; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class TqSortComparator extends WritableComparator { //对字节数据中map进行排序 所以需要先将Key反序列化为对象 然后再进行比较 public TqSortComparator() { super(TQ.class,true); } /** * 按照时间正序 温度倒序对字节数组排序 */ @Override public int compare(WritableComparable a, WritableComparable b) { TQ t1 = (TQ)a; TQ t2 = (TQ)b; int y = Integer.compare(t1.getYear(), t2.getYear()); if (y==0) { int m = Integer.compare(t1.getMonth(), t2.getMonth()); if (m == 0) { //前面加一个负号 就可以实现倒序的效果 return - Integer.compare(t1.getWd(), t2.getWd()); } return m; } return y; } }