• spark streaming整合kafka


    版本说明:spark:2.2.0;  kafka:0.10.0.0

    object StreamingDemo {
      def main(args: Array[String]): Unit = {
    
        Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
        Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.WARN)
        Logger.getLogger("org.apache.kafka.clients.consumer").setLevel(Level.WARN)
    
        val warehouseLocation = new File("hdfs://user/hive/warehouse").getAbsolutePath
    
        val bootstrapServers = "192.168.156.111:9092,192.168.156.111:9092,192.168.156.111:9092"
    
        val spark: SparkSession = SparkSession
          .builder()
          .appName("Spark SQL To Hive")
          .config("spark.sql.warehouse.dir", warehouseLocation)
          .master("local[4]")
          .enableHiveSupport()
          .getOrCreate()
    
        spark.conf.set("spark.streaming.concurrentJobs", 10)
        spark.conf.set("spark.streaming.kafka.maxRetries", 50)
        spark.conf.set("spark.streaming.stopGracefullyOnShutdown", true)
        spark.conf.set("spark.streaming.backpressure.enabled", true)
        spark.conf.set("spark.streaming.backpressure.initialRate", 5000)
        spark.conf.set("spark.streaming.kafka.maxRatePerPartition", 3000)
    
        @transient
        val sc: SparkContext = spark.sparkContext
        val ssc: StreamingContext = new StreamingContext(sc, Seconds(5))
    
        //kafka params
        val kafkaParams = Map[String, Object](
          "auto.offset.reset" -> "latest",
          "value.deserializer" -> classOf[StringDeserializer],
          "key.deserializer" -> classOf[StringDeserializer],
          "bootstrap.servers" -> bootstrapServers,
          "group.id" -> "test-consumer-group",
          "enable.auto.commit" -> (true: java.lang.Boolean)
        )
    
        var stream: InputDStream[ConsumerRecord[String, String]] = null
        val topics = Array("test")
    
        stream = KafkaUtils.createDirectStream[String, String](
          ssc,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
        )
    
    
        stream.foreachRDD(rdd => {
          val cache_rdd: RDD[String] = rdd.map(x => x.value()).cache()
    
          cache_rdd.foreach(println)
    
        })
    
        ssc.start()
    
        ssc.awaitTermination()
    
    
      }
    }
    

      

  • 相关阅读:
    Spring Boot 2.x 和 1.x 的区别
    安装 D2l 报错 全网最新解决方式
    实际业务处理 Kafka 消息丢失、重复消费和顺序消费的问题
    拜托,面试官别问我「位图」了
    数据库 sql语句的执行过程
    简易版计算器
    matchRoutes可以根据当前路径直接匹配到路由
    git文件名修改后提交远程无变化
    createreactapp 相关
    SAAS
  • 原文地址:https://www.cnblogs.com/zbw1112/p/11943474.html
Copyright © 2020-2023  润新知