• 封装工具类


    今天把项目所需一些工具类进行封装,主要包括一下工具类

    配置文件加载工具

    1. ES相关工具
    2. Redis 工具
    3. kafka 工具
    4. Offset 维护工具

    1、配置文件加载工具类

    配置文件主要记录些 redis kfka 等相关组件的配置信息,需要有一个工具来对配置文件进行加载。

    import java.io.InputStreamReader
    import java.nio.charset.StandardCharsets
    import java.util.Properties
    
    /**
     * 从配置文件读取 配置信息
     */
    object MyPropertiesUtil {
      def main(args: Array[String]): Unit = {
        val properties: Properties = MyPropertiesUtil.load("config.properties")
        println(properties.getProperty("kafka.broker.list"))
      }
    
      def load(propertiesName: String): Properties = {
        val properties: Properties = new Properties()
        //加载指定配置文件库:从类路径下加载配置文件,因为文件编译后
        // 会将 resource 下的 conf.propertis 加载到 target 下,此时可以通过当前类的类加载器进行加载配置文件
        properties.load(new InputStreamReader(
          Thread.currentThread().getContextClassLoader.getResourceAsStream(propertiesName), StandardCharsets.UTF_8))
        properties
      }
    }
    

    2、Redis 工具类

    这个小项目使用redis 的地方是使用 redis 进行高效日活剔重,和kfka的偏移量维护等,工具类主要方法如下:

    object MyRedisUtil {
      //声明连接对象
      private var jedisPool: JedisPool = null;
    
      //创建 jedisPool
      def build() = {
        val config = MyPropertiesUtil.load("config.properties")
    
        val host: String = config.getProperty("redis.host")
        val port: String = config.getProperty("redis.port")
    
        val jedisPoolConfig = new JedisPoolConfig()
        jedisPoolConfig.setMaxTotal(100) //最大连接数
        jedisPoolConfig.setMaxIdle(20) //最大空闲
        jedisPoolConfig.setMinIdle(20) //最小空闲
        jedisPoolConfig.setBlockWhenExhausted(true) //忙碌时是否等待
        jedisPoolConfig.setMaxWaitMillis(10000) //忙碌时等待时长 毫秒
        jedisPoolConfig.setTestOnBorrow(true) //每次获得连接的进行测试
    
        jedisPool = new JedisPool(jedisPoolConfig, host, port.toInt)
    
      }
    
      //获取 jedis 客户端
      def getJedisClient(): Jedis = {
        //判断 jedisPool 是否为空
        if (jedisPool == null) {
          build()
        }
        jedisPool.getResource
      }
    }
    

    3、Kafka工具类

    import java.util.Properties
    
    import org.apache.kafka.clients.consumer.ConsumerRecord
    import org.apache.kafka.common.TopicPartition
    import org.apache.kafka.common.serialization.StringDeserializer
    import org.apache.spark.streaming.dstream.InputDStream
    import org.apache.spark.streaming.{StreamingContext}
    import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
    
    /**
     * 读取Kafka工具
     */
    object MyKafkaUtil {
    
      //通过工具类加载配置文件
      val properties: Properties = MyPropertiesUtil.load("config.properties")
      val broker_list: String = properties.getProperty("kafka.broker.list")
      // kafka消费者配置
      var kafkaParam = collection.mutable.Map(
        "bootstrap.servers" -> broker_list, //用于初始化链接到集群的地址
        "key.deserializer" -> classOf[StringDeserializer],
        "value.deserializer" -> classOf[StringDeserializer],
        //用于标识这个消费者属于哪个消费团体
        "group.id" -> "gmall0429_group",
        //latest自动重置偏移量为最新的偏移量
        "auto.offset.reset" -> "latest",
        //如果是true,则这个消费者的偏移量会在后台自动提交,但是kafka宕机容易丢失数据
        //如果是false,会需要手动维护kafka偏移量
        "enable.auto.commit" -> (false: java.lang.Boolean))
    
    
      // 创建DStream,返回接收到的输入数据
      def getKafkaStream(topic: String, ssc: StreamingContext): InputDStream[ConsumerRecord[String, String]] = {
        val dStream = KafkaUtils.createDirectStream[String, String](
          ssc,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam)
        )
        dStream
      }
    
      def getKafkaStream(topic: String, ssc: StreamingContext, groupId: String): InputDStream[ConsumerRecord[String, String]] = {
        kafkaParam("group.id") = groupId
        val dStream = KafkaUtils.createDirectStream[String, String](
          ssc,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam))
        dStream
      }
    
      def getKafkaStream(topic: String, ssc: StreamingContext, offsets: Map[TopicPartition, Long], groupId: String): InputDStream[ConsumerRecord[String, String]] = {
        kafkaParam("group.id") = groupId
        val dStream = KafkaUtils.createDirectStream[String, String](
          ssc,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam, offsets))
        dStream
      }
    
      /**
       * LocationStrategies 位置策略
       * PreferConsistent 大部分集群采用这中策略:根据节点和 excetor 距离自行判断
       * PreferBrokers 只有在节点和   excetor 在同一节点才使用
       *  */
    }
    

    4、ES 工具类

    import java.util
    
    import io.searchbox.client.config.HttpClientConfig
    import io.searchbox.client.{JestClient, JestClientFactory}
    import io.searchbox.core._
    import org.elasticsearch.index.query.{BoolQueryBuilder, MatchQueryBuilder, TermQueryBuilder}
    import org.elasticsearch.search.builder.SearchSourceBuilder
    import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder
    import org.elasticsearch.search.sort.SortOrder
    import org.wdh01.gmall.realtime.bean.DauInfo
    
    
    /**
     * 操作 ES 工具类
     */
    object MyESutil {
      //声明 jest 客户端工厂
      private var jestFactory: JestClientFactory = null;
    
      def build() = {
        jestFactory = new JestClientFactory
        jestFactory.setHttpClientConfig(new HttpClientConfig
        .Builder("http://hadoop201:9200") //ES 连接地址
          .multiThreaded(true) //开启多线程处理
          .maxTotalConnection(200) //对大连接数
          .connTimeout(10000) //链接等待时间
          .readTimeout(10000) //操作等待时间
          .build()
        )
      }
    
      //获取客户端
      def getJestClient(): JestClient = {
        //如果连接工厂为空,调用 build() 创建工厂,否则直接返回对象
        if (jestFactory == null) {
          //创建客户端工厂对象
          build();
        }
        jestFactory.getObject
      }
    
      /**
       * 批量保存
       *
       * @param dauInfolist
       * @param indexName
       */
      def bulkInsert(dauInfolist: List[(String, DauInfo)], indexName: String) = {
        //首先判断当前采集周期是否有数据
        if (dauInfolist != null && dauInfolist.size > 0) {
          //获取客户端链接
          val client: JestClient = getJestClient()
          val builder: Bulk.Builder = new Bulk.Builder()
          for ((id, dauInfo) <- dauInfolist) {
            val index: Index = new Index.Builder(dauInfo)
              .index(indexName)
              .id(id)
              .`type`("_doc")
              .build()
            builder.addAction(index)
          }
          //创建批量保存对象
          val bulk: Bulk = builder.build()
          val bulkRes: BulkResult = client.execute(bulk)
          //输出查看 保存ES 记录数
          println("ES 插入 " + bulkRes.getItems.size() + " 条数据...")
          //关闭资源
          client.close()
        }
      }
    
      //   查询多个文档数据 方式2
      def queryDos1() = {
        //获取客户端链接
        val client: JestClient = getJestClient()
        //用于构建查询JSONM格式字符串
        val searchSourceBuilder: SearchSourceBuilder = new SearchSourceBuilder
        val boolQueryBuilder: BoolQueryBuilder = new BoolQueryBuilder()
        boolQueryBuilder.must(new MatchQueryBuilder("name", "人"))
        boolQueryBuilder.filter(new TermQueryBuilder("actorList.name", "殷桃"))
    
        searchSourceBuilder.query(boolQueryBuilder)
        searchSourceBuilder.from(0)
        searchSourceBuilder.size(1)
        searchSourceBuilder.sort("doubanScore", SortOrder.DESC)
    
        searchSourceBuilder.highlighter(new HighlightBuilder().field("name"))
    
        val query: String = searchSourceBuilder.toString()
        //  println(query)
    
        //封装 search 对象
        val search: Search = new Search.Builder(query)
          .addIndex("movie_index")
          .build()
        val result: SearchResult = client.execute(search)
        val list: util.List[SearchResult#Hit[util.Map[String, Any], Void]] = result.getHits(classOf[util.Map[String, Any]])
        //将 java list  转为 scala 集合
        import scala.collection.JavaConverters._
        val list1: List[util.Map[String, Any]] = list.asScala.map(_.source).toList
        println(list1.mkString("\n"))
    
        //关闭连接
        client.close()
      }
    
      //   查询多个文档数据 方式1
      def queryDos() = {
        //获取客户端链接
        val client: JestClient = getJestClient()
        val query: String =
          """
            |{
            |  "query": {
            |    "match": {
            |      "name": "人"
            |    }
            |  },
            |  "sort": [
            |    {
            |      "doubanScore": {
            |        "order": "desc"
            |      }
            |    }
            |  ]
            |}
            |""".stripMargin
        //封装 search 对象
        val search: Search = new Search.Builder(query)
          .addIndex("movie_index")
          .build()
        val result: SearchResult = client.execute(search)
        val list: util.List[SearchResult#Hit[util.Map[String, Any], Void]] = result.getHits(classOf[util.Map[String, Any]])
        //将 java list  转为 scala 集合
        import scala.collection.JavaConverters._
        val list1: List[util.Map[String, Any]] = list.asScala.map(_.source).toList
        println(list1.mkString("\n"))
    
        //关闭连接
        client.close()
      }
    
      // 根据ID 查询数据
      def queryById() = {
        //获取客户端链接
        val client: JestClient = getJestClient()
        //设置需要查询的 index & id
        val get: Get = new Get.Builder("movie_index", "5").build()
        //获取返回值对象
        val result: DocumentResult = client.execute(get)
        //此处直接通过返回值对象的 getJsonString 方法进行输出
        println(result.getJsonString)
        //关闭连接
        client.close()
      }
    
      // ES 插入数据 方式二,封装眼里类对象 插入文档
      def putIndex1() = {
        //获取客户端链接
        val client: JestClient = getJestClient()
        val actorList = new util.ArrayList[util.Map[String, Object]]()
        val actorMap = new util.HashMap[String, Object]()
        actorMap.put("id", "01")
        actorMap.put("name", "殷桃")
        actorList.add(actorMap)
        //封装成样例类对象
        val movie: Movie = Movie(102, "人世间", 9.5f, actorList)
    
        //创建Action 实现类 Index
        val index: Index = new Index.Builder(movie) //放入样例类对象
          .index("movie_index")
          .`type`("movie")
          .id("5")
          .build()
    
    
        client.execute(index)
    
        //关闭链接
        client.close()
      }
    
      // ES 插入数据 方式一
      def putIndex() = {
        //获取客户端链接
        val client: JestClient = getJestClient()
        //定义执行的 source
        val source: String =
          """{
            |  "id":101,
            |  "name":"peration meigong river",
            |     "doubanScore": 8.1,
            |   "actorList":
            |   [
            |     {"id":1,
            |       "name":"liu yi fei"
            |     }]
            |}""".stripMargin
        //创建插入的index,Bulider 的参数表示要插入的文档对象,底层会转换为 JSON 对象,也可以传入封装后的眼里类对象
        val index: Index = new Index.Builder(source)
          .index("movie_index")
          .`type`("movie")
          .id("4")
          .build()
    
        //使用客户端对象操作ES,execute 的参数是 Action 类型,Index 是 Action  的实现类
        client.execute(index)
    
        //关闭链接
        client.close()
      }
    
      def main(args: Array[String]): Unit = {
        //putIndex1
        //putIndex1
        //queryById()
        //queryDos
        queryDos1
      }
    }
    
    //定义样例类
    case class Movie(id: Long, name: String, doubanScore: Float, actorList: util.List[util.Map[String, Object]]) {
    
    }
    

    5、Offset 维护工具类

    import java.util
    
    import org.apache.kafka.common.TopicPartition
    import org.apache.spark.streaming.kafka010.OffsetRange
    import redis.clients.jedis.Jedis
    
    /**
     * 偏移量维护
     */
    object OffsetManagerUtil {
    
    
      //从 Redis 获取偏移量 ;redis:type Hash ,key offset:topic:groupid fileld:partition value 偏移量
      def getOffset(topic: String, groupid: String): Map[TopicPartition, Long] = {
        //获取 jedis 连接
        val jedis: Jedis = MyRedisUtil.getJedisClient()
        //拼接key offset:topic:groupid
        var offsetKey = "offset" + topic + ":" + groupid
        //获取当前消费者组对应分区和偏移量
        val offsetMap: util.Map[String, String] = jedis.hgetAll(offsetKey)
        //关闭连接
        jedis.close()
        //将java map 转换为 scala map,scala map 一些操作更方便
        import scala.collection.JavaConverters._
        offsetMap.asScala.map {
          case (partition, offset) => {
            // Map[TopicPartition, Long]
            (new TopicPartition(topic, partition.toInt), offset.toLong)
          }
        }.toMap
      }
    
      //保存 偏移量 到 Redis
      def saveOffset(topic: String, groupId: String, offsetRanges: Array[OffsetRange]): Unit = {
        //获取 jedis 连接
        val jedis: Jedis = MyRedisUtil.getJedisClient()
        //拼接key offset:topic:groupid
        var offsetKey = "offset" + topic + ":" + groupId
        //定义 Java Map,用于存放 偏移量
        val offsetMap: util.HashMap[String, String] = new util.HashMap[String, String]()
        //遍历 OffsetRange 封装 offsetMap
        for (offsetRange <- offsetRanges) {
          val partitionID: Int = offsetRange.partition
          val fromOffset: Long = offsetRange.fromOffset
          val untilOffset: Long = offsetRange.untilOffset
          offsetMap.put(partitionID.toString, untilOffset.toString)
          //输出测试
          println("保存分区:" + partitionID + " " + fromOffset + " -----> " + untilOffset)
        }
        //保存数据
        jedis.hmset(offsetKey, offsetMap)
        //关闭连接
        jedis.close()
      }
    }
  • 相关阅读:
    Python3——爬取淘宝评论
    python爬虫 URL分析
    python3爬取网页图片
    python_2 / python_3 区别与安装问题
    vue.$nextTick 解决了哪些问题
    Vue 路由缓存
    vue elementui form表单验证
    Hadoop Hive sql 语法详解
    sql server, mysql, oracle平时用法的区别
    Excel中值得收藏的12个函数公式
  • 原文地址:https://www.cnblogs.com/wdh01/p/16217663.html
Copyright © 2020-2023  润新知