• spark streaming整合kafka


    版本说明:spark:2.2.0;  kafka:0.10.0.0

    object StreamingDemo {
      def main(args: Array[String]): Unit = {
    
        Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
        Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.WARN)
        Logger.getLogger("org.apache.kafka.clients.consumer").setLevel(Level.WARN)
    
        val warehouseLocation = new File("hdfs://user/hive/warehouse").getAbsolutePath
    
        val bootstrapServers = "192.168.156.111:9092,192.168.156.111:9092,192.168.156.111:9092"
    
        val spark: SparkSession = SparkSession
          .builder()
          .appName("Spark SQL To Hive")
          .config("spark.sql.warehouse.dir", warehouseLocation)
          .master("local[4]")
          .enableHiveSupport()
          .getOrCreate()
    
        spark.conf.set("spark.streaming.concurrentJobs", 10)
        spark.conf.set("spark.streaming.kafka.maxRetries", 50)
        spark.conf.set("spark.streaming.stopGracefullyOnShutdown", true)
        spark.conf.set("spark.streaming.backpressure.enabled", true)
        spark.conf.set("spark.streaming.backpressure.initialRate", 5000)
        spark.conf.set("spark.streaming.kafka.maxRatePerPartition", 3000)
    
        @transient
        val sc: SparkContext = spark.sparkContext
        val ssc: StreamingContext = new StreamingContext(sc, Seconds(5))
    
        //kafka params
        val kafkaParams = Map[String, Object](
          "auto.offset.reset" -> "latest",
          "value.deserializer" -> classOf[StringDeserializer],
          "key.deserializer" -> classOf[StringDeserializer],
          "bootstrap.servers" -> bootstrapServers,
          "group.id" -> "test-consumer-group",
          "enable.auto.commit" -> (true: java.lang.Boolean)
        )
    
        var stream: InputDStream[ConsumerRecord[String, String]] = null
        val topics = Array("test")
    
        stream = KafkaUtils.createDirectStream[String, String](
          ssc,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
        )
    
    
        stream.foreachRDD(rdd => {
          val cache_rdd: RDD[String] = rdd.map(x => x.value()).cache()
    
          cache_rdd.foreach(println)
    
        })
    
        ssc.start()
    
        ssc.awaitTermination()
    
    
      }
    }
    

      

  • 相关阅读:
    数据分析面试题
    二、初始化superset
    一、下载安装superset
    leaflet如何加载10万数据
    leaflet中如何优雅的解决百度、高德地图的偏移问题
    oracle 获取数据库时间
    dev中gridcontrol中改变符合某一条件的行的背景色
    dev中动态初始化菜单栏
    oracle向数据库中插入时间数据
    Silverlight中全屏处理
  • 原文地址:https://www.cnblogs.com/zbw1112/p/11943474.html
Copyright © 2020-2023  润新知