• Flink双流jion在电商订单中的测试


    开发工作:

    package com.king.app
    
    import java.util.Objects
    
    import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
    import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction
    import org.apache.flink.streaming.api.scala._
    import org.apache.flink.streaming.api.windowing.time.Time
    import org.apache.flink.util.Collector
    
    /**
     * @Author: KingWang
     * @Date: 2022/2/12  
     * @Desc:
     **/
    object FlinkDataStreamJoinTest {
    
    
      //订单Id,订单号,创建时间
      case class Order(id:String, orderNum:String, ts:Long)
      //订单明细Id,订单Id,商品编码,数量,价格,创建时间
      case class OrderDetail(id:String, orderId:String,productCode:String, qty:Int,price:Double,ts:Long)
    
      //汇总宽表: 订单Id,订单号,明细Id,产品编码,数量,价格,明细时间
      case class OrderInfo(orderId:String,orderNum:String,orderDetailId:String,productCode:String,qty:Int,price:Double,ts:Long)
    
      def main(args: Array[String]): Unit = {
    
        //1. 获取执行环境
        val env = StreamExecutionEnvironment.getExecutionEnvironment
        env.setParallelism(1)
    
        //2. 读取2个端口数据创建流,并提取时间生成watermark
        val orderStream = env.socketTextStream("localhost", 7788)
          .filter(x=> Objects.nonNull(x) && !"".equals(x)).map{ x=>
            val arr = x.split(",")
            Order(arr(0),arr(1),arr(2).toLong)
          }.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps[Order]()
            .withTimestampAssigner(new SerializableTimestampAssigner[Order] {
              override def extractTimestamp(element: Order, recordTimestamp: Long): Long = element.ts * 1000L
            })
        )
    
        val orderDetailStream = env.socketTextStream("localhost", 7789)
          .filter(x=> Objects.nonNull(x) && !"".equals(x)).map{x=>
            val arr = x.split(",")
          OrderDetail(arr(0),arr(1),arr(2),arr(3).toInt,arr(4).toDouble,arr(5).toLong)
          }.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps[OrderDetail]()
          .withTimestampAssigner(new SerializableTimestampAssigner[OrderDetail] {
            override def extractTimestamp(element: OrderDetail, recordTimestamp: Long): Long = element.ts * 1000L
          })
        )
    
        //3. 双流join
        val finalStream = orderStream.keyBy(_.id).intervalJoin(orderDetailStream.keyBy(_.orderId))
          .between(Time.seconds(-5),Time.seconds(5))
    //      .lowerBoundExclusive()   设置左边开区间
    //      .upperBoundExclusive()   设置右边开区间
          .process(new ProcessJoinFunction[Order,OrderDetail,OrderInfo] {
            override def processElement(order: Order, detail: OrderDetail, ctx: ProcessJoinFunction[Order, OrderDetail, OrderInfo]#Context, out: Collector[OrderInfo]): Unit = {
    
              val orderInfo = OrderInfo(order.id,order.orderNum,detail.id,detail.productCode,detail.qty,detail.price,detail.ts)
              out.collect(orderInfo)
            }
          })
    
        //4. 打印
        finalStream.print("order info >>>> ")
    
        //5. 启动任务
        env.execute("joinStream")
      }
    
    }

    订单数据:

    1001,PO0001,1
    1002,PO0002,2

    订单明细数据:

    2001,1001,xiaomi,3,2000,1
    2002,1001,apple,2,5000,1
    2003,1002,Java程序设计,1,50,2
    2004,1002,Scala语言精通,1,60,3
    2005,1003,SumSundisk,2,8000,3

    通过将两个流式数据关联完成订单信息表的合并。

  • 相关阅读:
    Linux 全盘备份恢复工具(Clonezilla)
    Dremel琢美电磨机配件指南
    智能设备常用电机入门指南(实物+原理图)
    硬件入门 之 20种视频接口线
    【Spring源码分析】Bean加载流程概览
    面试题:Spring为什么默认bean为单例?
    @Autowired @Resource @Qualifier的区别
    Spring的接口InitializingBean、BeanPostProcessor以及注解@PostConstruct、bean的init-method的执行先后顺序
    Spring Bean的生命周期(非常详细)
    spring中BeanFactory和FactoryBean的区别
  • 原文地址:https://www.cnblogs.com/30go/p/15886547.html
Copyright © 2020-2023  润新知