• 2021寒假(17)


    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    import scala.collection.mutable
    
    object Spark05_Bc {
    
        def main(args: Array[String]): Unit = {
    
            val sparConf = new SparkConf().setMaster("local").setAppName("Acc")
            val sc = new SparkContext(sparConf)
    
            val rdd1 = sc.makeRDD(List(
                ("a", 1),("b", 2),("c", 3)
            ))
    //        val rdd2 = sc.makeRDD(List(
    //            ("a", 4),("b", 5),("c", 6)
    //        ))
            val map = mutable.Map(("a", 4),("b", 5),("c", 6))
    
    
    
            // join会导致数据量几何增长,并且会影响shuffle的性能,不推荐使用
            //val joinRDD: RDD[(String, (Int, Int))] = rdd1.join(rdd2)
            //joinRDD.collect().foreach(println)
            // (a, 1),    (b, 2),    (c, 3)
            // (a, (1,4)),(b, (2,5)),(c, (3,6))
            rdd1.map {
                case (w, c) => {
                    val l: Int = map.getOrElse(w, 0)
                    (w, (c, l))
                }
            }.collect().foreach(println)
    
    
    
            sc.stop()
    
        }
    }
    
  • 相关阅读:
    1组Alpha冲刺总结
    1组Beta冲刺4/5
    1组Beta冲刺5/5
    1组Alpha冲刺4/6
    1组Alpha冲刺总结
    1组Beta冲刺2/5
    1组Beta冲刺3/5
    1组Beta冲刺2/5
    1组Alpha冲刺4/6
    1组Alpha冲刺5/6
  • 原文地址:https://www.cnblogs.com/ywqtro/p/14290889.html
Copyright © 2020-2023  润新知