今天把项目所需一些工具类进行封装,主要包括一下工具类
配置文件加载工具
- ES相关工具
- Redis 工具
- kafka 工具
- Offset 维护工具
1、配置文件加载工具类
配置文件主要记录些 redis kfka 等相关组件的配置信息,需要有一个工具来对配置文件进行加载。
import java.io.InputStreamReader import java.nio.charset.StandardCharsets import java.util.Properties /** * 从配置文件读取 配置信息 */ object MyPropertiesUtil { def main(args: Array[String]): Unit = { val properties: Properties = MyPropertiesUtil.load("config.properties") println(properties.getProperty("kafka.broker.list")) } def load(propertiesName: String): Properties = { val properties: Properties = new Properties() //加载指定配置文件库:从类路径下加载配置文件,因为文件编译后 // 会将 resource 下的 conf.propertis 加载到 target 下,此时可以通过当前类的类加载器进行加载配置文件 properties.load(new InputStreamReader( Thread.currentThread().getContextClassLoader.getResourceAsStream(propertiesName), StandardCharsets.UTF_8)) properties } }
2、Redis 工具类
这个小项目使用redis 的地方是使用 redis 进行高效日活剔重,和kfka的偏移量维护等,工具类主要方法如下:
object MyRedisUtil { //声明连接对象 private var jedisPool: JedisPool = null; //创建 jedisPool def build() = { val config = MyPropertiesUtil.load("config.properties") val host: String = config.getProperty("redis.host") val port: String = config.getProperty("redis.port") val jedisPoolConfig = new JedisPoolConfig() jedisPoolConfig.setMaxTotal(100) //最大连接数 jedisPoolConfig.setMaxIdle(20) //最大空闲 jedisPoolConfig.setMinIdle(20) //最小空闲 jedisPoolConfig.setBlockWhenExhausted(true) //忙碌时是否等待 jedisPoolConfig.setMaxWaitMillis(10000) //忙碌时等待时长 毫秒 jedisPoolConfig.setTestOnBorrow(true) //每次获得连接的进行测试 jedisPool = new JedisPool(jedisPoolConfig, host, port.toInt) } //获取 jedis 客户端 def getJedisClient(): Jedis = { //判断 jedisPool 是否为空 if (jedisPool == null) { build() } jedisPool.getResource } }
3、Kafka工具类
import java.util.Properties import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.streaming.dstream.InputDStream import org.apache.spark.streaming.{StreamingContext} import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} /** * 读取Kafka工具 */ object MyKafkaUtil { //通过工具类加载配置文件 val properties: Properties = MyPropertiesUtil.load("config.properties") val broker_list: String = properties.getProperty("kafka.broker.list") // kafka消费者配置 var kafkaParam = collection.mutable.Map( "bootstrap.servers" -> broker_list, //用于初始化链接到集群的地址 "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], //用于标识这个消费者属于哪个消费团体 "group.id" -> "gmall0429_group", //latest自动重置偏移量为最新的偏移量 "auto.offset.reset" -> "latest", //如果是true,则这个消费者的偏移量会在后台自动提交,但是kafka宕机容易丢失数据 //如果是false,会需要手动维护kafka偏移量 "enable.auto.commit" -> (false: java.lang.Boolean)) // 创建DStream,返回接收到的输入数据 def getKafkaStream(topic: String, ssc: StreamingContext): InputDStream[ConsumerRecord[String, String]] = { val dStream = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam) ) dStream } def getKafkaStream(topic: String, ssc: StreamingContext, groupId: String): InputDStream[ConsumerRecord[String, String]] = { kafkaParam("group.id") = groupId val dStream = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam)) dStream } def getKafkaStream(topic: String, ssc: StreamingContext, offsets: Map[TopicPartition, Long], groupId: String): InputDStream[ConsumerRecord[String, String]] = { kafkaParam("group.id") = groupId val dStream = KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam, offsets)) dStream } /** * LocationStrategies 位置策略 * PreferConsistent 大部分集群采用这中策略:根据节点和 excetor 距离自行判断 * PreferBrokers 只有在节点和 excetor 在同一节点才使用 * */ }
4、ES 工具类
import java.util import io.searchbox.client.config.HttpClientConfig import io.searchbox.client.{JestClient, JestClientFactory} import io.searchbox.core._ import org.elasticsearch.index.query.{BoolQueryBuilder, MatchQueryBuilder, TermQueryBuilder} import org.elasticsearch.search.builder.SearchSourceBuilder import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder import org.elasticsearch.search.sort.SortOrder import org.wdh01.gmall.realtime.bean.DauInfo /** * 操作 ES 工具类 */ object MyESutil { //声明 jest 客户端工厂 private var jestFactory: JestClientFactory = null; def build() = { jestFactory = new JestClientFactory jestFactory.setHttpClientConfig(new HttpClientConfig .Builder("http://hadoop201:9200") //ES 连接地址 .multiThreaded(true) //开启多线程处理 .maxTotalConnection(200) //对大连接数 .connTimeout(10000) //链接等待时间 .readTimeout(10000) //操作等待时间 .build() ) } //获取客户端 def getJestClient(): JestClient = { //如果连接工厂为空,调用 build() 创建工厂,否则直接返回对象 if (jestFactory == null) { //创建客户端工厂对象 build(); } jestFactory.getObject } /** * 批量保存 * * @param dauInfolist * @param indexName */ def bulkInsert(dauInfolist: List[(String, DauInfo)], indexName: String) = { //首先判断当前采集周期是否有数据 if (dauInfolist != null && dauInfolist.size > 0) { //获取客户端链接 val client: JestClient = getJestClient() val builder: Bulk.Builder = new Bulk.Builder() for ((id, dauInfo) <- dauInfolist) { val index: Index = new Index.Builder(dauInfo) .index(indexName) .id(id) .`type`("_doc") .build() builder.addAction(index) } //创建批量保存对象 val bulk: Bulk = builder.build() val bulkRes: BulkResult = client.execute(bulk) //输出查看 保存ES 记录数 println("ES 插入 " + bulkRes.getItems.size() + " 条数据...") //关闭资源 client.close() } } // 查询多个文档数据 方式2 def queryDos1() = { //获取客户端链接 val client: JestClient = getJestClient() //用于构建查询JSONM格式字符串 val searchSourceBuilder: SearchSourceBuilder = new SearchSourceBuilder val boolQueryBuilder: BoolQueryBuilder = new BoolQueryBuilder() boolQueryBuilder.must(new MatchQueryBuilder("name", "人")) boolQueryBuilder.filter(new TermQueryBuilder("actorList.name", "殷桃")) searchSourceBuilder.query(boolQueryBuilder) searchSourceBuilder.from(0) searchSourceBuilder.size(1) searchSourceBuilder.sort("doubanScore", SortOrder.DESC) searchSourceBuilder.highlighter(new HighlightBuilder().field("name")) val query: String = searchSourceBuilder.toString() // println(query) //封装 search 对象 val search: Search = new Search.Builder(query) .addIndex("movie_index") .build() val result: SearchResult = client.execute(search) val list: util.List[SearchResult#Hit[util.Map[String, Any], Void]] = result.getHits(classOf[util.Map[String, Any]]) //将 java list 转为 scala 集合 import scala.collection.JavaConverters._ val list1: List[util.Map[String, Any]] = list.asScala.map(_.source).toList println(list1.mkString("\n")) //关闭连接 client.close() } // 查询多个文档数据 方式1 def queryDos() = { //获取客户端链接 val client: JestClient = getJestClient() val query: String = """ |{ | "query": { | "match": { | "name": "人" | } | }, | "sort": [ | { | "doubanScore": { | "order": "desc" | } | } | ] |} |""".stripMargin //封装 search 对象 val search: Search = new Search.Builder(query) .addIndex("movie_index") .build() val result: SearchResult = client.execute(search) val list: util.List[SearchResult#Hit[util.Map[String, Any], Void]] = result.getHits(classOf[util.Map[String, Any]]) //将 java list 转为 scala 集合 import scala.collection.JavaConverters._ val list1: List[util.Map[String, Any]] = list.asScala.map(_.source).toList println(list1.mkString("\n")) //关闭连接 client.close() } // 根据ID 查询数据 def queryById() = { //获取客户端链接 val client: JestClient = getJestClient() //设置需要查询的 index & id val get: Get = new Get.Builder("movie_index", "5").build() //获取返回值对象 val result: DocumentResult = client.execute(get) //此处直接通过返回值对象的 getJsonString 方法进行输出 println(result.getJsonString) //关闭连接 client.close() } // ES 插入数据 方式二,封装眼里类对象 插入文档 def putIndex1() = { //获取客户端链接 val client: JestClient = getJestClient() val actorList = new util.ArrayList[util.Map[String, Object]]() val actorMap = new util.HashMap[String, Object]() actorMap.put("id", "01") actorMap.put("name", "殷桃") actorList.add(actorMap) //封装成样例类对象 val movie: Movie = Movie(102, "人世间", 9.5f, actorList) //创建Action 实现类 Index val index: Index = new Index.Builder(movie) //放入样例类对象 .index("movie_index") .`type`("movie") .id("5") .build() client.execute(index) //关闭链接 client.close() } // ES 插入数据 方式一 def putIndex() = { //获取客户端链接 val client: JestClient = getJestClient() //定义执行的 source val source: String = """{ | "id":101, | "name":"peration meigong river", | "doubanScore": 8.1, | "actorList": | [ | {"id":1, | "name":"liu yi fei" | }] |}""".stripMargin //创建插入的index,Bulider 的参数表示要插入的文档对象,底层会转换为 JSON 对象,也可以传入封装后的眼里类对象 val index: Index = new Index.Builder(source) .index("movie_index") .`type`("movie") .id("4") .build() //使用客户端对象操作ES,execute 的参数是 Action 类型,Index 是 Action 的实现类 client.execute(index) //关闭链接 client.close() } def main(args: Array[String]): Unit = { //putIndex1 //putIndex1 //queryById() //queryDos queryDos1 } } //定义样例类 case class Movie(id: Long, name: String, doubanScore: Float, actorList: util.List[util.Map[String, Object]]) { }
5、Offset 维护工具类
import java.util import org.apache.kafka.common.TopicPartition import org.apache.spark.streaming.kafka010.OffsetRange import redis.clients.jedis.Jedis /** * 偏移量维护 */ object OffsetManagerUtil { //从 Redis 获取偏移量 ;redis:type Hash ,key offset:topic:groupid fileld:partition value 偏移量 def getOffset(topic: String, groupid: String): Map[TopicPartition, Long] = { //获取 jedis 连接 val jedis: Jedis = MyRedisUtil.getJedisClient() //拼接key offset:topic:groupid var offsetKey = "offset" + topic + ":" + groupid //获取当前消费者组对应分区和偏移量 val offsetMap: util.Map[String, String] = jedis.hgetAll(offsetKey) //关闭连接 jedis.close() //将java map 转换为 scala map,scala map 一些操作更方便 import scala.collection.JavaConverters._ offsetMap.asScala.map { case (partition, offset) => { // Map[TopicPartition, Long] (new TopicPartition(topic, partition.toInt), offset.toLong) } }.toMap } //保存 偏移量 到 Redis def saveOffset(topic: String, groupId: String, offsetRanges: Array[OffsetRange]): Unit = { //获取 jedis 连接 val jedis: Jedis = MyRedisUtil.getJedisClient() //拼接key offset:topic:groupid var offsetKey = "offset" + topic + ":" + groupId //定义 Java Map,用于存放 偏移量 val offsetMap: util.HashMap[String, String] = new util.HashMap[String, String]() //遍历 OffsetRange 封装 offsetMap for (offsetRange <- offsetRanges) { val partitionID: Int = offsetRange.partition val fromOffset: Long = offsetRange.fromOffset val untilOffset: Long = offsetRange.untilOffset offsetMap.put(partitionID.toString, untilOffset.toString) //输出测试 println("保存分区:" + partitionID + " " + fromOffset + " -----> " + untilOffset) } //保存数据 jedis.hmset(offsetKey, offsetMap) //关闭连接 jedis.close() } }