• sparkStreaming获取kafka数据(java版本)


    环境

    spark-2.2.0

    kafka_2.11-0.10.0.1

    jdk1.8

    配置好jdk,创建项目并将kafka和spark的jar包添加到项目中,除此之外还需要添加spark-streaming-kafka-*****.jar,笔者这里用的是spark-streaming-kafka-0-10_2.11-2.2.0.jar,可在spark官网上自行下载

     1 import java.util.Arrays;
     2 import java.util.Collection;
     3 import java.util.HashMap;
     4 import java.util.Map;
     5 
     6 import org.apache.kafka.clients.consumer.ConsumerRecord;
     7 import org.apache.kafka.common.serialization.StringDeserializer;
     8 import org.apache.spark.SparkConf;
     9 import org.apache.spark.api.java.JavaPairRDD;
    10 import org.apache.spark.api.java.JavaRDD;
    11 import org.apache.spark.api.java.function.Function;
    12 import org.apache.spark.api.java.function.PairFunction;
    13 import org.apache.spark.api.java.function.VoidFunction;
    14 import org.apache.spark.rdd.RDD;
    15 import org.apache.spark.streaming.Durations;
    16 import org.apache.spark.streaming.api.java.JavaDStream;
    17 import org.apache.spark.streaming.api.java.JavaInputDStream;
    18 import org.apache.spark.streaming.api.java.JavaPairDStream;
    19 import org.apache.spark.streaming.api.java.JavaStreamingContext;
    20 import org.apache.spark.streaming.kafka010.ConsumerStrategies;
    21 import org.apache.spark.streaming.kafka010.KafkaUtils;
    22 import org.apache.spark.streaming.kafka010.LocationStrategies;
    23 
    24 import scala.Tuple2;
    25 
    26 public class SparkStreamingFromkafka {
    27 
    28     public static void main(String[] args) throws Exception {
    29         // TODO Auto-generated method stub
    30         SparkConf sparkConf  = new SparkConf().setMaster("local[*]").setAppName("SparkStreamingFromkafka");
    31         JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf , Durations.seconds(1));
    32         
    33         Map<String, Object> kafkaParams = new HashMap<>();
    34         kafkaParams.put("bootstrap.servers", "192.168.246.134:9092");//多个可用ip可用","隔开
    35         kafkaParams.put("key.deserializer", StringDeserializer.class);
    36         kafkaParams.put("value.deserializer", StringDeserializer.class);
    37         kafkaParams.put("group.id", "sparkStreaming");
    38         Collection<String> topics = Arrays.asList("video");//配置topic,可以是数组
    39 
    40         JavaInputDStream<ConsumerRecord<String, String>> javaInputDStream =KafkaUtils.createDirectStream(
    41                 streamingContext, 
    42                 LocationStrategies.PreferConsistent(), 
    43                 ConsumerStrategies.Subscribe(topics, kafkaParams));
    44     
    45         JavaPairDStream<String, String> javaPairDStream = javaInputDStream.mapToPair(new PairFunction<ConsumerRecord<String, String>, String, String>(){
    46             private static final long serialVersionUID = 1L;
    47             @Override
    48             public Tuple2<String, String> call(ConsumerRecord<String, String> consumerRecord) throws Exception {
    49                 return new Tuple2<>(consumerRecord.key(), consumerRecord.value());
    50             }
    51         });
    52         javaPairDStream.foreachRDD(new VoidFunction<JavaPairRDD<String,String>>() {
    53             @Override
    54             public void call(JavaPairRDD<String, String> javaPairRDD) throws Exception {
    55                 // TODO Auto-generated method stub
    56                 javaPairRDD.foreach(new VoidFunction<Tuple2<String,String>>() {
    57                     @Override
    58                     public void call(Tuple2<String, String> tuple2)
    59                             throws Exception {
    60                         // TODO Auto-generated method stub
    61                         System.out.println(tuple2._2);
    62                     }
    63                 });
    64             }
    65         });
    66         streamingContext.start();
    67         streamingContext.awaitTermination();
    68     }
    69 
    70 }
  • 相关阅读:
    Coursera 算法二 week 5 BurrowsWheeler
    pta 编程题7 List Leaves
    pta 编程题6 树的同构
    pta编程题5 Pop Sequence
    pat乙级1067
    pat乙级1060
    pat乙级1059
    1.ActionBar
    安卓开发必须收藏的网站
    genymotion常见问题解答
  • 原文地址:https://www.cnblogs.com/timeTraveler/p/9869694.html
Copyright © 2020-2023  润新知