用scala写的一段Spark Streaming连接Kafka的入门代码
import kafka.serializer.StringDecoder
import org.apache.log4j.Logger
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDstream}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.log4j.{Level, Logger}
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
//sc: SparkContext
var ssc = new StreamingContext(sc, Seconds(2))
var kafkaTopic = "test"
var Zk = "localhost:2181"
var KafkaParams = Map(
"boorstrap.server" => "0.0.0.0:9092",
"zookeeper.connect" => "localhost“2181”,
“group.id" => "spark-streaming-test"
)
val topics = Set[String] = kafkaTopic.split(",").map(_.trim).toSet
val stream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics)
stream.foreachRDD(rdd=>{
if(!rdd.isEmpty){
val count = rdd.count
rdd.collect().foreach(println)
}else{
println("No data captured")
}
}
)
ssc.start()
ssc.awaitTermination()
ssc.stop(stopSparkContext = True, stopGracefully= True)