• spark读取kafka写入redis


    object PassengerFlowConsumerRedis {
    
      private val logger: Logger = Logger.getLogger(this.getClass)
    
      def main(args: Array[String]): Unit = {
    
    
        val properties = PropertiesScalaUtils.loadProperties("kafka.properties")
        val kafkaParams = Map[String, Object](
          ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> properties.getProperty("kafka.bootstrap.servers"),
          ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
          ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
          ConsumerConfig.GROUP_ID_CONFIG -> "group-66",
          ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> properties.getProperty("kafka.auto.offset.reset"),
          ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))
    
        val conf = new SparkConf().setIfMissing("spark.master", "local[4]").setAppName("UserCountStat")
    
        val streamingContext = new StreamingContext(conf, Seconds(5))
       
        val redisHost = properties.getProperty("redis.host")
        val redisPort = properties.getProperty("redis.port")
        val redisTimeout = properties.getProperty("redis.timeout")
        val maxTotal = properties.getProperty("redis.maxTotal")
        val maxIdle = properties.getProperty("redis.maxIdle")
        val minIdle = properties.getProperty("redis.minIdle")
    
        JedisPoolUtils.makePool(redisHost, redisPort.toInt, redisTimeout.toInt, maxTotal.toInt, maxIdle.toInt, minIdle.toInt)
    
        val jedis: Jedis = JedisPoolUtils.getPool.getResource
    
        val topicsPartition = Map("topic_1"-> 3,"topic_2"-> 4,"topic_3"-> 5)
        val topics = topicsPartition.keys.toList
    
    
        val fromOffsets: Map[TopicPartition, Long] = readOffsets(jedis, topicsPartition)
    
        val kafkaStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(
          streamingContext,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](topics, kafkaParams, fromOffsets)
        )
        
    
        //开始处理批次消息
        kafkaStream.foreachRDD(rdd => {
          if(!rdd.isEmpty()){
            //获取当前批次的RDD的偏移量
            val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
    
            rdd.foreachPartition(partition=>{
    
              val offset: OffsetRange = offsetRanges(TaskContext.get.partitionId)
              logger.info(s"${offset.topic} ${offset.partition} ${offset.fromOffset} ${offset.untilOffset}")
    
              val jedisClient = JedisPoolUtils.getPool.getResource
    
              // jedisClient.select(1)
    
              val pipline: Pipeline = jedisClient.pipelined();
              //开启事务
              pipline.multi()
              partition.foreach(record=>{
                //数据计算逻辑
                println(record)
                println(s"===============${record.topic()}_${record.partition()}====================")
              })
              //更新Offset
              offsetRanges.foreach { offsetRange =>
                logger.info(s"topic: ${offsetRange.topic} === partition: ${offsetRange.partition} === fromOffset: ${offsetRange.fromOffset} === untilOffset: ${offsetRange.untilOffset}")
                val topic_partition_key = offsetRange.topic + "_" + offsetRange.partition
                pipline.set(topic_partition_key, offsetRange.untilOffset.toString)
              }
              //提交事务
              pipline.exec();
              //关闭pipeline
              pipline.sync();
              //关闭连接
              jedisClient.close()
            })
          }
        })
        streamingContext.start()
        streamingContext.awaitTermination()
        streamingContext.stop()
    
        case class MyRecord(topic: String, id: String, timestramp: String, uuid: String) extends Serializable
    
        def processLogs(rdd: RDD[ConsumerRecord[String, String]]): Array[MyRecord] = {
          rdd.map(_.value()).flatMap(parseLog).filter(_ != null).distinct().collect()
        }
    
        //解析每条数据,生成MyRecord
        def parseLog(line: String): Option[MyRecord] = {
          // 数据格式: topic_1|43|2019-07-23 14:55:23.467|039702c7-0f8d-4ca1-9a01-e0fdd68c63de
          val ary: Array[String] = line.split("\|", -1);
          try {
            val topic = ary(0).trim
            val id = ary(1).trim
            val timestramp = ary(2).trim
            val uuid = ary(3).trim
    
            return Some(MyRecord(topic, id, timestramp, uuid))
          } catch {
            case e: Exception =>
              logger.error("解析错误", e)
              println(e.getMessage)
          }
          return None
        }
    
      }
    
      def readOffsets(jedis: Jedis, topicsPartition: Map[String,Int]): Map[TopicPartition, Long] = {
        //设置每个分区起始的Offset
        var fromOffsets: Map[TopicPartition, Long] = Map()
        try {
          // jedis.select(1)
          topicsPartition.foreach(topic => {
            var topicFromOffsets: Map[TopicPartition, Long] = Map()
            for (i <- 0 until topic._2.toInt) {
              val topic_partition_key = topic._1 + "_" + i
              if (!jedis.exists(topic_partition_key)) {
                jedis.set(topic_partition_key, "0")
              }
              val lastSavedOffset = jedis.get(topic_partition_key)
              logger.info(s"topic ${topic} partition ${i} get lastSavedOffset from redis: ${lastSavedOffset}")
              topicFromOffsets += (new TopicPartition(topic._1, i) -> lastSavedOffset.toLong)
            }
            fromOffsets ++= topicFromOffsets
          })
        }catch {
          case e: Exception =>
            logger.error("readOffsets error ",e)
            System.exit(1)
        }
        fromOffsets
      }
    }
    诸业皆是自作自受,休咎祸福,尽从心生。
  • 相关阅读:
    一次sqlldr性能测试案例
    Java过滤特殊字符的正则表达式
    朝阳自行成交过户流程
    direct=true 错误处理方法 (关于 Oracle 的数据导入导出及 Sql Loader (sqlldr) 的用法
    SQLLDR直接加载几个参数的测试
    处理超出打开游标的最大数异常(ORA01000: maximum open cursors exceeded)
    java 用PreparedStatement来setDate,用问号的形式给日期问号赋值
    linux下的文件结构介绍
    用SQLLDR来装载CLOB/BLOB列的控制文件例子
    关于oracle日期的说明
  • 原文地址:https://www.cnblogs.com/1394htw/p/14478266.html
Copyright © 2020-2023  润新知