• sparkSQL中的example学习(3)


    UserDefinedTypedAggregation.scala(用户可自定义类型)

    
    import org.apache.spark.sql.expressions.Aggregator
    import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
    
    object UserDefinedTypedAggregation {
    
     case class Employee(name: String, salary: Long)
     case class Average(var sum: Long, var count: Long)
    
    
     object MyAverage extends Aggregator[Employee, Average, Double] {
    
      //A zero value for this aggregation. Should satisfy the property that any b + zero = b
      def zero: Average = Average(0L, 0L)
    
      //Commine two values to produce a new value. For performance, the function may modify `buffer`
      //and return it instead of constructiong a new object
      def reduce(buffer: Average, employee: Employee): Average = {
       buffer.sum += employee.salary
       buffer.count += 1
       buffer
      }
    
      //Merge two intermediate values
      def merge(b1: Average, b2: Average): Average = {
       b1.sum += b2.sum
       b1.count += b2.count
       b1
      }
    
      //Transform the ouput of the reduction
      def finish(reducetion: Average): Double = reducetion.sum.toDouble / reducetion.count
    
      //Specifies the Encoder for the intermediate value type
      def bufferEncoder: Encoder[Average] = Encoders.product
    
      //Specifies the Encoder for the final output value type
      def outputEncoder: Encoder[Double] = Encoders.scalaDouble
     }
    
    // $example off: type_custom_aggregation$
    
    
     def main(args: Array[String]): Unit = {
      val spark = SparkSession
        .builder()
        .appName("Spark SQL user-defined Datasets aggregation example")
        .master("local")
        .getOrCreate()
    
      import spark.implicits._
    
      val ds = spark.read.json("/Users/hadoop/app/spark/examples/src/main/resources/employees.json").as[Employee]
      ds.show()
    
      val averageSalary = MyAverage.toColumn.name("average_salary")
      val result = ds.select(averageSalary)
      result.show()
    
    
    
      spark.stop()
     }
    
    }
    
    

    屏幕快照 2019-05-14 03.57.12

  • 相关阅读:
    js dom
    js Number string
    jq ajax数据交互
    js date 和 math
    js中英文网页切换
    日常使用
    php求和
    empty()
    时间戳、日期相互转换
    数组转字符串之间相互转换
  • 原文地址:https://www.cnblogs.com/suixingc/p/sparksql-zhong-deexample-xue-xi-3.html
Copyright © 2020-2023  润新知