• Flink(五) 【消费kafka】


    0.目的

    测试flink消费kafka的几种消费策略

    kafkaSource.setStartFromEarliest() //从起始位置
    kafkaSource.setStartFromLatest() //从最新位置
    kafkaSource.setStartFromTimestamp("起始时间") //从指定时间开始消费
    kafkaSource.setStartFromGroupOffsets() //默认
    kafkaSource.setStartFromSpecificOffsets() //指定offset
    

    1.本地测试

    package flink_01_connector.source
    
    import java.util.Properties
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import org.apache.flink.streaming.api.scala._
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
    import scala.collection.JavaConverters._
    
    /**
     * @description: kafka connector
     * @author: HaoWu
     * @create: 2020年12月16日
     */
    object KafkaConnectorTest {
      def main(args: Array[String]): Unit = {
        // 0 初始化环境
        val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        env.setParallelism(1)
    
        // 1 构建 Kafka Source
        val topics = List("xes_test_anwser_detail").asJava
        val props = new Properties()
        props.put("bootstrap.servers", "kafka地址")
        props.put("group.id", "test5")
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") //key 反序列化
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") //value 反序列化
    
        /*  props.put("enable.auto.commit", "true") //自动提交
            props.put("auto.commit.interval.ms", "1500") //提交
            props.put("auto.offset.reset", "lastest") //offset从最新的位置开始读取*/
        val kafkaSource = new FlinkKafkaConsumer011[String](topics, new SimpleStringSchema(), props)
    
        val lag = System.currentTimeMillis() - 24 * 3600 * 1000
        kafkaSource.setStartFromTimestamp(lag) // 从前几小时开始消费
    
        // 2 获取流
        val kafkaStream: DataStream[String] = env.addSource(kafkaSource)
    
        // 3 打印
        kafkaStream.print()
    
        // 4 执行
        env.execute()
      }
    }
    
    

    2.线上测试

    package flink_01_connector.source
    
    import java.util.Properties
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import org.apache.flink.api.java.utils.ParameterTool
    import org.apache.flink.streaming.api.scala._
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
    import scala.collection.JavaConverters._
    
    /**
     * @description: 读取kafka流
     * @author: HaoWu
     * @create: 2020年12月16日
     */
    object KafkaConnectorOnlineTest {
      def main(args: Array[String]): Unit = {
        // 0 初始化环境
        val parameterTool: ParameterTool = ParameterTool.fromArgs(args)
        val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
        env.setParallelism(1)
        val topic: String = parameterTool.get("topic") // topic:可设置多个,逗号分隔
        val bootStrapServer: String = parameterTool.get("bootstrap_server") // kafka集群url
        val groupId: String = parameterTool.get("group_id") // 消费者组
        val hours: Int = parameterTool.get("hours").toInt // 从几小时前开始消费
    
        //    val keyTabPath = parameterTool.get("keytab_path") // 安全验证
        //    env.registerCachedFile(keyTabPath, "keytab")
        // 设置全局参数
        env.getConfig.setGlobalJobParameters(parameterTool)
        // 1 构建 Kafka Source
        val topics = topic.split(",").toList.asJava
        val props = new Properties()
        props.put("bootstrap.servers", bootStrapServer)
        props.put("group.id", groupId)
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") //key 反序列化
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") //value 反序列化
        val kafkaSource = new FlinkKafkaConsumer011[String](topics, new SimpleStringSchema(), props)
    
    
        val lag = System.currentTimeMillis() - hours * 3600 * 1000
        kafkaSource.setStartFromTimestamp(lag) // 从前几小时开始消费
        //    kafkaSource.setStartFromEarliest() // 从最开始消费
        // 2 获取流
        val kafkaStream: DataStream[String] = env.addSource(kafkaSource)
    
        // 3 打印
        kafkaStream.print("| log |")
    
        // 4 执行
        env.execute()
      }
    }
    

    提交作业

    #!/bin/bash
    
    source  ~/.bashrc
    
    cd $(dirname $0)
    day=$(date +%Y%m%d%H%M)
    
    #flink 
    jobName=KafkaConnectorOnlineTest_wuhao
    clazz=flink_01_connector.source.KafkaConnectorOnlineTest
    jar_path=/home//wuhao/flink-learning/jar/02_flink_learning-1.0-SNAPSHOT-jar-with-dependencies.jar
    parallelism=2
    sourceParallelism=4
    
    #kafka  bootstrap_server
    bootstrap_server=kafka url
    topic=xes_test_anwser_detail
    group_id=KafkaConnectorOnlineTest_wuhao
    hours=24
    
    #kudu
    kudu_instance=1v6_common_edc_online_answer
    kudu_host=****:7051
    kudu_flush_num=5
    
    #-----------------------run----------------------------------------------
    /software/servers/flink1.9.1_wx_dp_hive/bin/flink run -m yarn-cluster 
    -ynm ${jobName} 
    -yqu root.wangxiao.dp 
    -c ${clazz} ${jar_path} 
    --jobName ${jobName} 
    --keytab_path /home/wx_dp_hive/wx_dp_hive.keytab 
    --bootstrap_server ${bootstrap_server} 
    --topic ${topic} 
    --group_id ${group_id} 
    --isSecurity ${isSecurity} 
    --consumerStrategy ${consumerStrategy} 
    --hours ${hours} 
    --parallelism ${parallelism} 
    --sourceParallelism ${sourceParallelism} 
    --kudu_instance ${kudu_instance} 
    --kudu_host ${kudu_host} 
    --kudu_flush_num ${kudu_flush_num} >../logs/${jobName}_${day}.log 2>&1 &
                                       
    
  • 相关阅读:
    Node.js Event Loop 的理解 Timers,process.nextTick()
    Ajax关于readyState(状态值)和status(状态码)的研究
    原生 JavaScript 实现 AJAX、JSONP
    Python selenium.webdriver.chrome.options.Options() Examples
    【python】统一转换日期格式dateutil.parser.parse
    python读取doc
    大规模爬虫流程总结
    如何巧妙的利用selenium和requests组合来进行操作需要登录的页面
    使用pandas进行数据清洗
    twilio打电话和发短信
  • 原文地址:https://www.cnblogs.com/wh984763176/p/14148822.html
Copyright © 2020-2023  润新知