• 利用Scala进行自定义排序的几种方法


    # 第一种方法

    package day05
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object SortTest1 {
    
      def main(args: Array[String]): Unit = {
    
        val conf: SparkConf = new SparkConf().setAppName("SortTest1").setMaster("local[4]")
        val sc: SparkContext = new SparkContext(conf)
    
        val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")
    
        // parallelize构造RDD对象
        val contentRDD: RDD[String] = sc.parallelize(content)
        // 过滤
        val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
          val fileds: Array[String] = line.split(" ")
          val name: String = fileds(0)
          val age: Int = fileds(1).toInt
          val faceValue: Int = fileds(2).toInt
          (name, age, faceValue)
        })
        // 排序
        val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>new MethodForSort(tp._3,tp._2))
        println(sorted.collect().toBuffer)
        sc.stop()
      }
    
    }
    
    // 自定义排序,sorted排序继承Ordered,需要序列化
    class MethodForSort(val fv:Int, val age:Int) extends Ordered[MethodForSort] with Serializable{
      override def compare(that: MethodForSort): Int = {
        // 如果fv相等,就比较年龄
        if(this.fv == that.fv){
          this.age - that.age
        }else{
          -(this.fv - that.fv)
        }
      }
    }
    

    # 第二种方法

    package day05
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object SortTest2 {
    
      def main(args: Array[String]): Unit = {
    
        val conf: SparkConf = new SparkConf().setAppName("SortTest2").setMaster("local[4]")
        val sc: SparkContext = new SparkContext(conf)
    
        val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")
    
        // parallelize构造RDD对象
        val contentRDD: RDD[String] = sc.parallelize(content)
        // 过滤
        val mapRDD: RDD[MethodForSort2] = contentRDD.map(line => {
          val fileds: Array[String] = line.split(" ")
          val name: String = fileds(0)
          val age: Int = fileds(1).toInt
          val faceValue: Int = fileds(2).toInt
          // (name, age, faceValue)
          new MethodForSort2(name,age,faceValue)
        })
    
        // 将RRD里面装的MethodForSort2类型的数据进行排序
        val sorted: RDD[MethodForSort2] = mapRDD.sortBy(tp=>tp)
        println(sorted.collect().toBuffer)
        sc.stop()
      }
    
    }
    
    // 自定义排序,sorted排序继承Ordered,需要序列化
    class MethodForSort2(val name:String,val age:Int, val fv:Int) extends Ordered[MethodForSort2] with Serializable{
      override def compare(that: MethodForSort2): Int = {
        // 如果fv相等,就比较年龄
        if(this.fv == that.fv){
          this.age - that.age
        }else{
          -(this.fv - that.fv)
        }
      }
      override def toString: String = s"name:$name,age:$age,faceValue:$fv"
    }
    

    # 第三种方法

    package day05
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object SortTest3 {
    
      def main(args: Array[String]): Unit = {
    
        val conf: SparkConf = new SparkConf().setAppName("SortTest3").setMaster("local[4]")
        val sc: SparkContext = new SparkContext(conf)
    
        val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")
    
        // parallelize构造RDD对象
        val contentRDD: RDD[String] = sc.parallelize(content)
        // 过滤
        val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
          val fileds: Array[String] = line.split(" ")
          val name: String = fileds(0)
          val age: Int = fileds(1).toInt
          val faceValue: Int = fileds(2).toInt
          (name, age, faceValue)
        })
        // 排序
        val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>MethodForSort3(tp._3,tp._2))
        println(sorted.collect().toBuffer)
        sc.stop()
      }
    
    }
    
    // 自定义排序,sorted排序继承Ordered,需要序列化
    // case class 是多例模式
    case class MethodForSort3(fv:Int, age:Int) extends Ordered[MethodForSort3] with Serializable{
      override def compare(that: MethodForSort3): Int = {
        // 如果fv相等,就比较年龄
        if(this.fv == that.fv){
          this.age - that.age
        }else{
          -(this.fv - that.fv)
        }
      }
    }
    

    #第四种方法

    package day05
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object SortTest4 {
    
      def main(args: Array[String]): Unit = {
    
        val conf: SparkConf = new SparkConf().setAppName("SortTest4").setMaster("local[4]")
        val sc: SparkContext = new SparkContext(conf)
    
        val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")
    
        // parallelize构造RDD对象
        val contentRDD: RDD[String] = sc.parallelize(content)
        // 过滤
        val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
          val fileds: Array[String] = line.split(" ")
          val name: String = fileds(0)
          val age: Int = fileds(1).toInt
          val faceValue: Int = fileds(2).toInt
          (name, age, faceValue)
        })
        // 充分利用元组的比较规则,元组的比较规则:先比第一,相等再比第二个
        val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>(-tp._3,tp._2))
        println(sorted.collect().toBuffer)
        sc.stop()
      }
    
    }
    

    #第五种方法

    package day05
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object SortTest5 {
    
      def main(args: Array[String]): Unit = {
    
        val conf: SparkConf = new SparkConf().setAppName("SortTest5").setMaster("local[4]")
        val sc: SparkContext = new SparkContext(conf)
    
        val content = Array("laoduan 29 88","laoyang 30 100","laozhang 24 70","laoli 34 88")
    
        // parallelize构造RDD对象
        val contentRDD: RDD[String] = sc.parallelize(content)
        // 过滤
        val mapRDD: RDD[(String, Int, Int)] = contentRDD.map(line => {
          val fileds: Array[String] = line.split(" ")
          val name: String = fileds(0)
          val age: Int = fileds(1).toInt
          val faceValue: Int = fileds(2).toInt
          (name, age, faceValue)
        })
        //充分利用元组的比较规则,元组的比较规则:先比第一,相等再比第二个
        //Ordering[(Int, Int)]最终比较的规则格式
        //on[(String, Int, Int)]未比较之前的数据格式
        //(t =>(-t._3, t._2))怎样将规则转换成想要比较的格式
        implicit val rules = Ordering[(Int,Int)].on[(String,Int,Int)](t=>(-t._3,t._2))
        val sorted: RDD[(String, Int, Int)] = mapRDD.sortBy(tp=>tp)
        println(sorted.collect().toBuffer)
        sc.stop()
      }
    
    }
  • 相关阅读:
    Cookie-Session
    Chrome浏览器的Timing分析
    K-means: 多次random initialization来避免bad局部最优
    K-means: optimization objective(最小化cost function来求相应的参数)
    unsupervised learning: K-means 算法
    unsupervised learning: clustering介绍
    SVM: 实际中使用SVM的一些问题
    SVM: 使用kernels(核函数)的整个SVM算法过程
    SVM: 用kernels(核函数)来定义新的features,避免使用多项式,高斯kernel
    SVM:从数学上分析为什么优化cost function会产生大距离(margin)分类器
  • 原文地址:https://www.cnblogs.com/crazyforever/p/9898676.html
Copyright © 2020-2023  润新知