• Spark- Action实战


    Spark- Action实战

    package cn.rzlee.spark.core
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    object ActionOperation {
      def main(args: Array[String]): Unit = {
        //reduce()
        //collect()
        //count()
        //take()
        //saveAsTextFile()
        countByKey()
      }
    
    
      def reduce(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        val sum: Int = numbersRdd.reduce(_+_)
        println(sum)
      }
    
    
      def collect(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
    
        val doubleNumbers: RDD[Int] = numbersRdd.map(num=>num*2)
        for(num <- doubleNumbers){
          println(num)
        }
      }
    
      def count(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        val count: Long = numbersRdd.count()
        println(count)
      }
    
    
    
    
      def take(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
    
        val top3Numners = numbersRdd.take(3)
        for (num <- top3Numners){
          println(num)
        }
      }
    
      def saveAsTextFile(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
        val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
        numbersRdd.saveAsTextFile("C:\Users\txdyl\Desktop\log\out\saveAsTest\")
      }
    
      def countByKey(): Unit ={
        val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
        val sc = new SparkContext(conf)
    
        val studentList = Array(Tuple2("class1","tom"),Tuple2("class2","leo"), Tuple2("class1","jeo"),Tuple2("class2","jime"))
        val students: RDD[(String, String)] = sc.parallelize(studentList, 1)
        val studentsCounts: collection.Map[String, Long] = students.countByKey()
        println(studentsCounts)
      }
    
      // foreach是在远程机器上执行的,而不是将数据拉取到本地一条条执行,所以性能要比collect要高很多。
    
    }
  • 相关阅读:
    浏览器渲染引擎工作原理
    js运行原理与机制
    新式网络浏览器幕后揭秘
    网站身份信息验证规则Cookie、Session、Token、JWT说明
    http简介
    react全家桶从0搭建一个完整的react项目(react-router4、redux、redux-saga)
    雅虎前端优化的35条军规
    服务端渲染与客户端渲染
    DataURL与File,Blob,canvas对象之间的互相转换的Javascript
    docker安装samba
  • 原文地址:https://www.cnblogs.com/RzCong/p/9893573.html
Copyright © 2020-2023  润新知