先上代码:
package demo import kafka.serializer.StringDecoder import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} /** * Created by lou on 2018/04/25. */ object SimpleKafkaStream { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName(SimpleKafkaStream.getClass.getName).setMaster("local") sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val sc = new SparkContext(sparkConf) val interval = 5; val topics = Set("topic_test") val kafkaParams = Map[String, String]("metadata.broker.list" -> "192.168.10.1:9092", "serializer.class" -> "kafka.serializer.StringEncoder") val ssc = new StreamingContext(sc, Seconds(interval)) val kafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics) kafkaStream.foreachRDD { rdd => rdd.foreachPartition(records=>{ while (records.hasNext){ val message = records.next() println("message:" + message) } }) } } }
pom.xml配置:
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.6.0</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.10</artifactId> <version>0.9.0.1</version> </dependency>
异常日志:
17/09/20 22:35:10 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0) java.lang.NoSuchMethodException: scala.runtime.Nothing$.<init>(kafka.utils.VerifiableProperties) at java.lang.Class.getConstructor0(Class.java:3074) at java.lang.Class.getConstructor(Class.java:1817) at org.apache.spark.streaming.kafka.KafkaRDD$KafkaRDDIterator.<init>(KafkaRDD.scala:150) at org.apache.spark.streaming.kafka.KafkaRDD.compute(KafkaRDD.scala:136) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319) at org.apache.spark.rdd.RDD.iterator(RDD.scala:283) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) at org.apache.spark.scheduler.Task.run(Task.scala:86) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/09/20 22:35:11 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NoSuchMethodException: scala.runtime.Nothing$.<init>(kafka.utils.VerifiableProperties) at java.lang.Class.getConstructor0(Class.java:3074) at java.lang.Class.getConstructor(Class.java:1817) at org.apache.spark.streaming.kafka.KafkaRDD$KafkaRDDIterator.<init>(KafkaRDD.scala:150) at org.apache.spark.streaming.kafka.KafkaRDD.compute(KafkaRDD.scala:136) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319) at org.apache.spark.rdd.RDD.iterator(RDD.scala:283) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) at org.apache.spark.scheduler.Task.run(Task.scala:86) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
问题分析:
spark-streaming-kafka 与 kafka版本问题:spark1.6依赖kafka:0.8.2.1,修改pom.xml配置
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.6.0</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.10</artifactId> <version>0.8.2.1</version> </dependency>