• Flink实例(106):自定义时间和窗口的操作符(十二)自定义窗口分配器 周、月


    自定义 WindowAssigner

    如果我们定义按天、小时、分钟的滚动窗口都很容易实现。

    但是如果我们要定义一周(周日开始或周一),一个月(1号开始)的滚动窗口,那么现有API基本没法实现或很难实现。

    对此就需要我们实现一个自定义的窗口分配器。

    package com.atguigu.exercise.ETLHIVE
    
    import java.text.SimpleDateFormat
    import java.util
    import java.util.{Calendar, Collections, Date}
    
    import com.meda.utils.DateHelper
    import org.apache.flink.api.common.ExecutionConfig
    import org.apache.flink.api.common.typeutils.TypeSerializer
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
    import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner
    import org.apache.flink.streaming.api.windowing.triggers.{EventTimeTrigger, Trigger}
    import org.apache.flink.streaming.api.windowing.windows.TimeWindow
    
    class CustomWindowAssigner [T](tag: String) extends WindowAssigner[T, TimeWindow]{
      //窗口分配的主要方法,需要为每一个元素指定所属的分区
      override def assignWindows(t: T, timestamp: Long, windowAssignerContext: WindowAssigner.WindowAssignerContext): util.Collection[TimeWindow] = {
        var offset: (Long, Long) = null
        tag match {
          case "month" => offset = getTimestampFromMon(timestamp)
          case "week" => offset = getTimestampFromWeek(timestamp)
        }
        //分配窗口
        Collections.singletonList(new TimeWindow(offset._1, offset._2))
      }
    
      //注意此处需要进行类型的转换,否则或编译出错,java版本好像没问题,但是java对于上面的offset处理有点难搞,所以放弃了
      override def getDefaultTrigger(streamExecutionEnvironment: StreamExecutionEnvironment): Trigger[T, TimeWindow] = EventTimeTrigger.create().asInstanceOf[Trigger[T, TimeWindow]]
    
      override def getWindowSerializer(executionConfig: ExecutionConfig): TypeSerializer[TimeWindow] = new TimeWindow.Serializer
    
      //是否使用事件时间
      override def isEventTime: Boolean = true
    
      /**
       * 获取指定时间戳当月时间戳范围
       * eg:2020-03-12 11:35:13 (timestamp=1583984113960l)
       * 结果为:(1582992000000,1585670399999)=>(2020-03-01 00:00:00,2020-03-31 23:59:59)
       *
       * @param timestamp 时间戳
       * @return
       */
    
      def getTimestampFromMon(timestamp: Long): (Long, Long) = {
        val calendar = Calendar.getInstance()
        calendar.setTime(DateHelper.getInstance().getDateFromStr(new SimpleDateFormat("yyyyMM01000000").format(new Date(timestamp)), "yyyyMMddHHmmss"))
        val numsOfMon: Long = calendar.getActualMaximum(Calendar.DAY_OF_MONTH)
        calendar.set(Calendar.DAY_OF_MONTH, 1)
        val start: Long = calendar.getTimeInMillis
        val end: Long = start + numsOfMon * 24 * 60 * 60 * 1000 - 1
        (start, end)
      }
    
      /**
       * 获取指定时间戳本周时间范围(从周日开始)
       * eg:2020-03-14 23:59:59 (timestamp=1583895064000l)
       * 结果为:(1583596800000,1584201599999)=>(2020-03-08 00:00:00,2020-03-14 23:59:59)
       *
       * @param timestamp 时间戳
       * @return
       */
      def getTimestampFromWeek(timestamp: Long): (Long, Long) = {
        val calendar = Calendar.getInstance()
        calendar.setTime(DateHelper.getInstance().getDateFromStr(new SimpleDateFormat("yyyyMMdd000000").format(new Date(timestamp)), "yyyyMMddHHmmss"))
        //    calendar.setFirstDayOfWeek(Calendar.SUNDAY)//设置周日为首日  默认值,一般不用设置
        calendar.set(Calendar.DAY_OF_WEEK, Calendar.SUNDAY)
        val start: Long = calendar.getTimeInMillis
        (start, start + 7 * 24 * 60 * 60 * 1000l - 1)
      }
    }
    
    
    //输入数据
    case class Top100Input(event_id: String, date_d: String, timeStamp: Long, uid: Long, weekTag: String, monthTag: String)
    
    //调用
    val dStream: DataStream[Top100Input] = ...
    
    dStream
          .keyBy(_.weekTag)
          .window(new CustomWindowAssigner[Top100Input]("week"))
    
    dStream
          .keyBy(_.monthTag)
          .window(new CustomWindowAssigner[Top100Input]("month"))
  • 相关阅读:
    Linux文件系统
    Ant整合svnant(三)
    Linux文件系统基本结构和基本操作管理
    Ant生成javadoc(四)
    Linux系统常用命令
    Linux系统目录架构
    使用fdisk进行磁盘管理
    命令行BASH的基本操作
    python 获取项目的根路径
    游戏自动化测试思路
  • 原文地址:https://www.cnblogs.com/qiu-hua/p/14094418.html
Copyright © 2020-2023  润新知