版本说明:spark:2.2.0; kafka:0.10.0.0
object StreamingDemo { def main(args: Array[String]): Unit = { Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.WARN) Logger.getLogger("org.apache.kafka.clients.consumer").setLevel(Level.WARN) val warehouseLocation = new File("hdfs://user/hive/warehouse").getAbsolutePath val bootstrapServers = "192.168.156.111:9092,192.168.156.111:9092,192.168.156.111:9092" val spark: SparkSession = SparkSession .builder() .appName("Spark SQL To Hive") .config("spark.sql.warehouse.dir", warehouseLocation) .master("local[4]") .enableHiveSupport() .getOrCreate() spark.conf.set("spark.streaming.concurrentJobs", 10) spark.conf.set("spark.streaming.kafka.maxRetries", 50) spark.conf.set("spark.streaming.stopGracefullyOnShutdown", true) spark.conf.set("spark.streaming.backpressure.enabled", true) spark.conf.set("spark.streaming.backpressure.initialRate", 5000) spark.conf.set("spark.streaming.kafka.maxRatePerPartition", 3000) @transient val sc: SparkContext = spark.sparkContext val ssc: StreamingContext = new StreamingContext(sc, Seconds(5)) //kafka params val kafkaParams = Map[String, Object]( "auto.offset.reset" -> "latest", "value.deserializer" -> classOf[StringDeserializer], "key.deserializer" -> classOf[StringDeserializer], "bootstrap.servers" -> bootstrapServers, "group.id" -> "test-consumer-group", "enable.auto.commit" -> (true: java.lang.Boolean) ) var stream: InputDStream[ConsumerRecord[String, String]] = null val topics = Array("test") stream = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams) ) stream.foreachRDD(rdd => { val cache_rdd: RDD[String] = rdd.map(x => x.value()).cache() cache_rdd.foreach(println) }) ssc.start() ssc.awaitTermination() } }