02Hadoop二次排序2

案例：

数据：

邮编 | 日期 |金额

ILMN,2013-12-05,97.65
GOOD,2013-12-09,1078.14
IBM,2013-12-09,177.46
ILMN,2013-12-09,101.33
ILMN,2013-12-06,99.25,
GOOD,2013-12-06,1069.87
IBM,2013-12-06,177.67
GOOD,2013-12-05,1057.34
GOOD,2013-12-05,10.23
GOOD,2013-12-05,11.43
GOOD,2013-12-05,17.34

要求：把同一个邮编的放在一起，然后根据日期和金额降序排列。

效果如下：

思路：在map阶段，构造的key（CompositeKey）是：（邮编，日期）；value（NaturalValue）是（日期，价格）。然后key继承

WritableComparable，实现比较函数这样就可以保证一份数据出来是分区且区内有序的。

然后在shuffle过程中，指定一个key比较器(CompositeKeyComparator),使得在聚合过程后，对key按照先邮编，再时间，最后金额的顺序排序，key-value是键值对，key按照我们的意愿排好序了，

value也就排好了。

总的来说：降序什么的都是CompositeKeyComparator来决定的。

代码结构：

（1）key:组合键

  1 package com.book.test1;
  2 import java.io.DataInput;
  3 import java.io.DataOutput;
  4 import java.io.IOException;
  5 
  6 import org.apache.hadoop.io.DoubleWritable;
  7 import org.apache.hadoop.io.LongWritable;
  8 import org.apache.hadoop.io.Text;
  9 import org.apache.hadoop.io.Writable;
 10 import org.apache.hadoop.io.WritableComparable;
 11 /**
 12  * 这个的作用就是要数据在分区里面有序
 13  */
 14 /**
 15  * 定义组合键:就是可以把自己要比较的字段写入
 16  * @author Sxq
 17  *
 18  */
 19 //必须要时间这个WritableComparable这个类
 20 public class CompositeKey implements Writable, WritableComparable<CompositeKey> {
 21 
 22     // 股票的名字
 23     private Text stockSymbol;
 24     // 日期
 25     private LongWritable timestamp;
 26     private DoubleWritable price;
 27 
 28     
 29     public DoubleWritable getPrice() {
 30         return price;
 31     }
 32     public void setPrice(DoubleWritable price) {
 33         this.price = price;
 34     }
 35     public CompositeKey()
 36     {
 37         
 38     }
 39     public CompositeKey(Text _stockSymbol, LongWritable _timestamp,DoubleWritable _price) {
 40         this.stockSymbol = _stockSymbol;
 41         this.timestamp = _timestamp;
 42         this.price=_price;
 43     }
 44 
 45     
 46 
 47     public Text getStockSymbol() {
 48         return stockSymbol;
 49     }
 50 
 51 
 52     public void setStockSymbol(Text stockSymbol) {
 53         this.stockSymbol = stockSymbol;
 54     }
 55 
 56 
 57 
 58     public LongWritable getTimestamp() {
 59         return timestamp;
 60     }
 61 
 62 
 63 
 64     public void setTimestamp(LongWritable timestamp) {
 65         this.timestamp = timestamp;
 66     }
 67 
 68 
 69 
 70     //读出
 71     public void readFields(DataInput input) throws IOException {
 72         String value1=input.readUTF();
 73         long value2=input.readLong();
 74       this.stockSymbol=new  Text( value1);
 75       this.timestamp=  new LongWritable(value2);
 76       this.price=new DoubleWritable(input.readDouble());
 77     }
 78     
 79    //写入
 80     
 81     //@Override
 82     public void write(DataOutput output) throws IOException {
 83         output.writeUTF(this.stockSymbol.toString());
 84         output.writeLong(this.timestamp.get());
 85         output.writeDouble(this.price.get());
 86     }
 87     
 88     public int compareTo(CompositeKey other) {
 89         
 90            int comparator=this.stockSymbol.compareTo(other.stockSymbol);
 91             if(comparator==0)
 92             {
 93                 comparator=this.timestamp.compareTo(other.timestamp);
 94             }
 95         
 96         //升序
 97         //return comparator;
 98     
 99     return -comparator;
100     }
101 
102 
103     @Override
104     public String toString() {
105         return "CompositeKey [stockSymbol=" + stockSymbol + ", timestamp=" + timestamp + "]";
106     }
107     
108 }

(2)key对应的value：

package com.book.test1;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;

public class NaturalValue implements Writable {
private long timestamp;
private double privce;


public long getTimestamp() {
    return timestamp;
}

public void setTimestamp(long timestamp) {
    this.timestamp = timestamp;
}

public double getPrivce() {
    return privce;
}

public void setPrivce(double privce) {
    this.privce = privce;
}



public void readFields(DataInput input) throws IOException {
    this.timestamp=input.readLong();
    this.privce=input.readDouble();
    
    
    
    
}

public void write(DataOutput output) throws IOException {
    
    
    output.writeLong(this.timestamp);
    output.writeDouble(this.privce);
    
}

    
    
    
    
}

（3）分区器：

NaturalKeyPartitioner

package com.book.test1;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
 * 分区：按照邮编分，把邮编相同的放在一起
 * @author Sxq
 */

public class NaturalKeyPartitioner extends Partitioner<CompositeKey, NaturalValue> {

    @Override
    public int getPartition(CompositeKey key, NaturalValue value, int numPartitions) {
        return Math.abs((int)(key.getStockSymbol().hashCode())%numPartitions);
    }
    
    

}

（4）把key排序的比较器：在shuffle过程中用到的

package com.book.test1;

import javax.print.attribute.standard.MediaSize.Other;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 这个类的作用是把组合键排序，使得组合键也有顺序
 * @author Sxq
 *
 */
public class CompositeKeyComparator extends WritableComparator {

    public CompositeKeyComparator() {
         super(CompositeKey.class,true);
    }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        CompositeKey ck1 = (CompositeKey) a;
        CompositeKey ck2 = (CompositeKey) b;
        int comparison = ck1.getStockSymbol().compareTo(ck2.getStockSymbol());
        //如果邮编相同，则根据日期进一步处理。
        if (comparison == 0) {
            
            int comparison2=ck1.getTimestamp().compareTo(ck2.getTimestamp());
            // 如果日期相同，则需要根据价格进一步处理
            if (comparison2==0) {
                //按照价格降序
                return ck1.getPrice().compareTo(ck2.getPrice())>0?-1:1;

            } else {
                //日期不同，就按照日期降序
                return ck1.getTimestamp().compareTo(ck2.getTimestamp())>0?-1:1;            
            }
            }        
        else {
            return comparison;
        }
    }
    static {   
        WritableComparator.define(CompositeKey.class, new CompositeKeyComparator());   
    }

}

（5）reduce的分区器：

CompositeGroupingComparator

package com.book.test1;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 分组：就是在reduce阶段分到一个组；
 * 就是邮编相同的放在一个组里面
 * @author Sxq
 *
 */
public class CompositeGroupingComparator extends WritableComparator{
    
     public CompositeGroupingComparator() {

     super(CompositeKey.class,true);
     }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
      CompositeKey v1=(CompositeKey)a;
      CompositeKey v2=(CompositeKey)b;
      
      return v1.getStockSymbol().compareTo(v2.getStockSymbol());
        
        
        
        
    }

    
    
}

（6）驱动类：

package com.book.test1;

import java.io.IOException;
import java.util.Date;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Cmain {
    static class Map1 extends Mapper<LongWritable, Text, CompositeKey, NaturalValue> {
        @Override
        protected void map(LongWritable key, Text value,
                Mapper<LongWritable, Text, CompositeKey, NaturalValue>.Context context)
                throws IOException, InterruptedException {
            String line = value.toString().trim();
            String[] lines = line.split(",");
            Date date = DateUtil.getDate(lines[1]);
            //long timestamp = date.getTime();
            
            long timestamp=UtilsCmain.DataTranform(lines[1]);
            CompositeKey compositeKey = new CompositeKey();
            NaturalValue naturalValue = new NaturalValue();
            naturalValue.setPrivce(Double.valueOf(lines[2]));
            naturalValue.setTimestamp(timestamp);
            compositeKey.setStockSymbol(new Text(lines[0]));
            compositeKey.setPrice(new DoubleWritable(Double.valueOf(lines[2])));
            compositeKey.setTimestamp(new LongWritable(timestamp));
            context.write(compositeKey, naturalValue);
        }

    }

    static class reduce1 extends Reducer<CompositeKey, NaturalValue, Text, Text> {
        @Override
        protected void reduce(CompositeKey key, Iterable<NaturalValue> vlaue,
                Reducer<CompositeKey, NaturalValue, Text, Text>.Context context) throws IOException, InterruptedException {

            Iterator<NaturalValue> iterator = vlaue.iterator();
            StringBuffer stringBuffer = new StringBuffer();
            while (iterator.hasNext()) {
                NaturalValue naturalValue=iterator.next();
                stringBuffer.append("(");
                stringBuffer.append(naturalValue.getTimestamp());
                stringBuffer.append(","+naturalValue.getPrivce()+")");
            }

            context.write(new Text(key.getStockSymbol()), new Text(stringBuffer.toString()));
        }
    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(Cmain.class);

        job.setMapperClass(Map1.class);
        job.setReducerClass(reduce1.class);

        job.setMapOutputKeyClass(CompositeKey.class);
        job.setMapOutputValueClass(NaturalValue.class);

        job.setOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);

        job.setSortComparatorClass(CompositeKeyComparator.class);
        // 在Reduce端设置分组，使得同一个邮编的在同一个组
        job.setGroupingComparatorClass(CompositeGroupingComparator.class);
        // 设置分区
        job.setPartitionerClass(NaturalKeyPartitioner.class);

        // 指定输入的数据的目录
        FileInputFormat.setInputPaths(job, new Path("/Users/mac/Desktop/stock.txt"));

        FileOutputFormat.setOutputPath(job, new Path("/Users/mac/Desktop/flowresort"));

        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }

}

（7）工具类：将2012-12-09转为20121209这种形式：

package com.book.test1;

public class UtilsCmain {
     /**
      * 时间
      */
     public static  long  DataTranform(String vaule)
    {
        String[] args=vaule.split("-");
        String datatime=args[0]+args[1]+args[2];
        
         return Long.valueOf(datatime);
    
    }
    
    
}

运行结果：

相关阅读:
从零开始学android开发-通过WebService获取今日天气情况
 android常见错误-E/AndroidRuntime(13678): java.lang.NoClassDefFoundError:
java 使用相对路径读取文件
 冒泡排序
 快速排序
 为什么使用抽象类？有什么好处？
为什么用抽象类，接口
 String.valueOf()
Python 资源
 文本相似度-BM25算法
原文地址：https://www.cnblogs.com/shenxiaoquan/p/8671702.html