• streaming 接入kafka数据


    streaming 接入kafka数据

    maven依赖

    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
    <version>2.4.0-cdh6.3.0</version>
    </dependency>
    
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming_2.11</artifactId>
    <version>2.4.0-cdh6.3.0</version>
    </dependency>
    
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.0.0-cdh6.3.0</version>
    </dependency>
    
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>3.0.0-cdh6.3.0</version>
    </dependency>
    
    <dependency>
    <groupId>log4j</groupId>
    <artifactId>log4j</artifactId>
    <version>1.2.17</version>
    </dependency>
    

    代码示例

    import org.apache.log4j.{Level, Logger}
    import org.apache.kafka.common.serialization.{IntegerDeserializer, StringDeserializer}
    import org.apache.spark.{SparkConf, SparkContext}
    import org.apache.spark.streaming.{Seconds, StreamingContext, kafka010}
    import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
    import org.apache.spark.streaming.kafka010.KafkaUtils
    import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
    
    object SparkStreamingKafka {
    def main(args: Array[String]): Unit = {
    //进行kerberos 认证
    val JASS="D:\sparkstreaming\sparkstreaming_Test\src\main\scala\kerberos\jaas.conf"
    val KRB5="D:\sparkstreaming\sparkstreaming_Test\src\main\scala\kerberos\krb5.conf"
    System.setProperty("java.security.krb5.conf",KRB5)
    System.setProperty("java.security.auth.login.config",JASS)
    
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
    Logger.getLogger("org.eclipse.jetty.Server").setLevel(Level.OFF)
    Logger.getLogger("org,apache.kafka.clients.consumer").setLevel(Level.OFF)
    
    val conf = new SparkConf().setMaster("local[*]").setAppName("WordCount")
    
    //消费者配置
    val kafkaParams = Map[String, Object](
    "bootstrap.servers" -> "kafka05:30005", //kafka集群地址
    "key.deserializer" -> classOf[StringDeserializer],
    "value.deserializer" -> classOf[IntegerDeserializer],
    "group.id" -> "test", //消费者组名
    "auto.offset.reset" -> "earliest", //earliest 表示存在偏移量的时候从偏移量处开始消费
    "security.protocol" -> "SASL_PLAINTEXT",
    "enable.auto.commit" -> (false: java.lang.Boolean),//如果是true,则这个消费者的偏移量会在后台自动提交
    "sasl.kerberos.service.name" -> "kafka"
    );
    
    val topics = Array("zhctesttopic") //消费主题,可以同时消费多个
    //创建DStream,返回接收到的输入数据
    
    val ssc = new StreamingContext(conf, Seconds(1)) //刷新时间设置为1秒
    ssc.sparkContext.setLogLevel("WARN")
    
    val stream = KafkaUtils.createDirectStream[String, String](
    ssc,
    PreferConsistent,
    Subscribe[String, String](topics, kafkaParams))
    //打印获取到的数据,因为1秒刷新一次,所以数据长度大于0时才打印
    stream.map(_.key()).foreachRDD(rdd=>rdd.foreach(println))
    ssc.start();
    ssc.awaitTermination();
    }
    }
    
  • 相关阅读:
    python_元组
    python_列表
    python_字符串
    python_序列
    RFS一些基本概念
    学习RFS,所有文章的参考
    HDU 1754 线段树 单点跟新 HDU 1166 敌兵布阵 线段树 区间求和
    第四届河南省ACM SUBSTRING 字符串处理
    蓝桥杯 手链样式 排列组合
    蓝桥杯 牌型种数 DFS
  • 原文地址:https://www.cnblogs.com/HarSenZhao/p/12360686.html
Copyright © 2020-2023  润新知