• spark java API 实现二次排序


    package com.spark.sort;
    
    import java.io.Serializable;
    
    import scala.math.Ordered;
    
    public class SecondSortKey implements Serializable, Ordered<SecondSortKey> {
    	/**
    	 * serialVersionUID
    	 */
    	private static final long serialVersionUID = -2749925310062789494L;
    	private String first;
    	private long second;
    
    	public SecondSortKey(String first, long second) {
    		super();
    		this.first = first;
    		this.second = second;
    	}
    
    	public String getFirst() {
    		return first;
    	}
    
    	public void setFirst(String first) {
    		this.first = first;
    	}
    
    	public long getSecond() {
    		return second;
    	}
    
    	public void setSecond(long second) {
    		this.second = second;
    	}
    
    	@Override
    	public int hashCode() {
    		final int prime = 31;
    		int result = 1;
    		result = prime * result + ((first == null) ? 0 : first.hashCode());
    		result = prime * result + (int) (second ^ (second >>> 32));
    		return result;
    	}
    
    	@Override
    	public boolean equals(Object obj) {
    		if (this == obj)
    			return true;
    		if (obj == null)
    			return false;
    		if (getClass() != obj.getClass())
    			return false;
    		SecondSortKey other = (SecondSortKey) obj;
    		if (first == null) {
    			if (other.first != null)
    				return false;
    		} else if (!first.equals(other.first))
    			return false;
    		if (second != other.second)
    			return false;
    		return true;
    	}
    
    	@Override
    	public boolean $greater(SecondSortKey that) {
    		if (this.first.compareTo(that.getFirst()) > 0) {
    			return true;
    		} else if (this.first.equals(that.getFirst()) && this.second > that.getSecond()) {
    			return true;
    		}
    		return false;
    	}
    
    	@Override
    	public boolean $greater$eq(SecondSortKey that) {
    		if (this.$greater(that)) {
    			return true;
    		}else if(this.first.equals(that.getFirst()) && this.second == that.getSecond()){
    			return true;
    		}
    		return false;
    	}
    
    	@Override
    	public boolean $less(SecondSortKey that) {
    		if (this.first.compareTo(that.getFirst()) < 0) {
    			return true;
    		} else if (this.first.equals(that.getFirst()) && this.second < that.getSecond()) {
    			return true;
    		}
    		return false;
    	}
    
    	@Override
    	public boolean $less$eq(SecondSortKey that) {
    		if (this.$less(that)) {
    			return true;
    		}else if(this.first.equals(that.getFirst()) && this.second == that.getSecond()){
    			return true;
    		}
    		return false;
    	}
    
    	@Override
    	public int compare(SecondSortKey that) {
    		if (this.first.compareTo(that.getFirst()) != 0) {
    			return this.first.compareTo(that.getFirst());
    		} else {
    			return (int) (this.second - that.getSecond());
    		}
    	}
    
    	@Override
    	public int compareTo(SecondSortKey that) {
    		if (this.first.compareTo(that.getFirst()) != 0) {
    			return this.first.compareTo(that.getFirst());
    		} else {
    			return (int) (this.second - that.getSecond());
    		}
    	}
    
    }
    

      

     1
    package com.spark.sort;
     2 
     3 import org.apache.spark.SparkConf;
     4 import org.apache.spark.api.java.JavaPairRDD;
     5 import org.apache.spark.api.java.JavaRDD;
     6 import org.apache.spark.api.java.JavaSparkContext;
     7 import org.apache.spark.api.java.function.Function;
     8 import org.apache.spark.api.java.function.PairFunction;
     9 
    10 import scala.Tuple2;
    11 
    12 public class SecondSort {
    13 
    14     public static void main(String[] args) {
    15         SparkConf sparkConf = new SparkConf().setAppName("secondsort").setMaster("local");
    16         JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    17         JavaRDD<String> textFileRDD = jsc.textFile("D:\test\input\sort");
    18         JavaPairRDD<SecondSortKey,String> pairRDD = textFileRDD.mapToPair(new PairFunction<String, SecondSortKey, String>() {
    19             @Override
    20             public Tuple2<SecondSortKey, String> call(String t) throws Exception {
    21                 String[] split = t.split("	");
    22                 String first = split[0];
    23                 Long second = Long.valueOf(split[1]);
    24                 SecondSortKey ssk = new SecondSortKey(first, second);
    25                 return new Tuple2<SecondSortKey, String>(ssk, t);
    26             }
    27         });
    28         
    29         //排序
    30         JavaPairRDD<SecondSortKey, String> sortByKeyRDD =pairRDD.sortByKey();
    31         
    32         //过滤自定义的key
    33         JavaRDD<String> mapRDD = sortByKeyRDD.map(new Function<Tuple2<SecondSortKey,String>, String>() {
    34 
    35             @Override
    36             public String call(Tuple2<SecondSortKey, String> v1) throws Exception {
    37                 
    38                 return v1._2;
    39             }
    40         });
    41         
    42         mapRDD.saveAsTextFile("D:\test\output\sort");
    43         
    44         jsc.close();
    45     }
    46     
    47     
    48 }
    

      

    源数据:

    a 12
    a 2
    b 26
    c 85
    ab 32
    ab 23
    ac 12
    b 85
    a 36
    b 69
    c 25

    排序之后:

    a 2
    a 12
    a 36
    ab 23
    ab 32
    ac 12
    b 26
    b 69
    b 85
    c 25
    c 85

  • 相关阅读:
    c++实现的一个链栈
    VS2005_XP DDK_DS3.2安装说明
    驱动开发遇到的一些问题
    fatal error LNK1000解决方法
    C++文件依存关系---提高编译速度
    架构的一些心得
    CPU与GPU的一点理解
    看"C++动态链接库编程深入浅出"的一些笔录
    VC2008下提示找不到MSVCP90D.dll的解决办法
    DLL搜索顺序
  • 原文地址:https://www.cnblogs.com/sunrise88/p/7251483.html
Copyright © 2020-2023  润新知