• Flume+Kafka+SparkStreaming+Hbase+可视化(四)---未全部完成


    打通实时数据处理
      1).流程图
     
    LoggerGenerator
    package Scala
    
    import java.time
    import org.apache.log4j.Logger
    
    object LoggerGenerator {
    
      def main(args: Array[String]): Unit = {
    
        val logger:Logger = Logger.getLogger(LoggerGenerator.getClass.getName)
    
        // 人物列表
        val nameList = List("Wade", "Marry", "Paul", "James", "Mike", "Tomas")
    
        while (true) {
    
          Thread.sleep(100)
          val timeStamp_value = time.LocalDate.now()
          val index = new util.Random().nextInt(5)
          val value = new util.Random().nextInt(100)
    
          logger.info(timeStamp_value + "	" + nameList(index) + "	" + value)
        }
      }
    }
    

      

    log4j.properties

    log4j.rootLogger=INFO,stdout,flume
    
    log4j.appender.stdout= org.apache.log4j.ConsoleAppender
    log4j.appender.stdout.layout= org.apache.log4j.PatternLayout
    log4j.appender.stdout.layout.ConversionPattern=%p	%d{yyyy-MM-dd HH:mm:ss}	%c	[%t]	%m%n
    
    #...
    log4j.appender.flume = org.apache.flume.clients.log4jappender.Log4jAppender
    log4j.appender.flume.Hostname = localhost
    log4j.appender.flume.Port = 41414
    log4j.appender.flume.UnsafeMode = true
    

      

    flume.conf

    #TODO flumeConf for log4j to Flume
    # Name the components on this agent
    log4jtoflume.sources = avro-source
    log4jtoflume.channels = memory-channel
    log4jtoflume.sinks = kafka-sink
    
    # configure for sources
    log4jtoflume.sources.avro-source.type = avro
    log4jtoflume.sources.avro-source.bind = localhost
    log4jtoflume.sources.avro-source.port = 41414
    
    # configure for channels
    log4jtoflume.channels.memory-channel.type = memory
    log4jtoflume.channels.memory-channel.capacity = 1000
    log4jtoflume.channels.memory-channel.transactionCapacity = 100
    
    # configure for sinks
    log4jtoflume.sinks.kafka-sink.type = org.apache.flume.sink.kafka.KafkaSink
    log4jtoflume.sinks.kafka-sink.topic = streamingTopic
    log4jtoflume.sinks.kafka-sink.brokerList =  bigdata:9092
    log4jtoflume.sinks.kafka-sink.batchSize = 20
    
    # connect
    log4jtoflume.sinks.kafka-sink.channel = memory-channel
    log4jtoflume.sources.avro-source.channels = memory-channel

    Kafka

    kafka-topics.sh --create --zookeeper bigdata:2181 --partitions 2 --replication-factor 1 --topic streamingTopic

    streaming code

    package Scala
    
    import org.apache.kafka.common.serialization.StringDeserializer
    import org.apache.spark.SparkConf
    import org.apache.spark.streaming.{StreamingContext,Seconds}
    import org.apache.spark.streaming.kafka010._
    
    object streamingCode {
    
      def main(args: Array[String]): Unit = {
    
        if (args.length != 2){
          System.err.print("Usage: streamingCode <brokerList> <topics>")
        }
    
        val Array(brokerList, topics) = args
    
        val conf = new SparkConf().setMaster("local[2]").setAppName("realTimeStreaming")
        val ssc = new StreamingContext(conf, Seconds(5))
    
        val kafkaParams =  Map [String ,Object](
          "bootstrap.servers" -> brokerList,
          "key.deserializer" -> classOf[StringDeserializer],
          "value.deserializer" -> classOf[StringDeserializer],
          "group.id" -> "group_id_1",
          "auto.offset.reset" -> "latest",
          "enable.auto.commit" -> (false: java.lang.Boolean)
        )
    
        val topic = Array(topics)
    
        val streamLog4j = KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](topic, kafkaParams))
    
        streamLog4j.map(x => x.value()).count().print()
    
        ssc.start()
        ssc.awaitTermination()
    
      }
    }
    

      

  • 相关阅读:
    python网络爬虫——scrapy核心组件介绍、请求传参、下载中间件
    python网络爬虫——Scrapy全站数据爬取【手动请求发送】及post请求的发送
    python网络爬虫——scrapy框架持久化存储
    php常见排序
    php实现快速排序
    mysql读写分离 主从同步
    php预定义字符
    本地Navicat连不上Linux虚拟机MySQL数据库问题
    php yii 命令
    yii 定义场景
  • 原文地址:https://www.cnblogs.com/mlxx9527/p/9668837.html
Copyright © 2020-2023  润新知