开发工作:
package com.king.app import java.util.Objects import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy} import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.util.Collector /** * @Author: KingWang * @Date: 2022/2/12 * @Desc: **/ object FlinkDataStreamJoinTest { //订单Id,订单号,创建时间 case class Order(id:String, orderNum:String, ts:Long) //订单明细Id,订单Id,商品编码,数量,价格,创建时间 case class OrderDetail(id:String, orderId:String,productCode:String, qty:Int,price:Double,ts:Long) //汇总宽表: 订单Id,订单号,明细Id,产品编码,数量,价格,明细时间 case class OrderInfo(orderId:String,orderNum:String,orderDetailId:String,productCode:String,qty:Int,price:Double,ts:Long) def main(args: Array[String]): Unit = { //1. 获取执行环境 val env = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) //2. 读取2个端口数据创建流,并提取时间生成watermark val orderStream = env.socketTextStream("localhost", 7788) .filter(x=> Objects.nonNull(x) && !"".equals(x)).map{ x=> val arr = x.split(",") Order(arr(0),arr(1),arr(2).toLong) }.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps[Order]() .withTimestampAssigner(new SerializableTimestampAssigner[Order] { override def extractTimestamp(element: Order, recordTimestamp: Long): Long = element.ts * 1000L }) ) val orderDetailStream = env.socketTextStream("localhost", 7789) .filter(x=> Objects.nonNull(x) && !"".equals(x)).map{x=> val arr = x.split(",") OrderDetail(arr(0),arr(1),arr(2),arr(3).toInt,arr(4).toDouble,arr(5).toLong) }.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps[OrderDetail]() .withTimestampAssigner(new SerializableTimestampAssigner[OrderDetail] { override def extractTimestamp(element: OrderDetail, recordTimestamp: Long): Long = element.ts * 1000L }) ) //3. 双流join val finalStream = orderStream.keyBy(_.id).intervalJoin(orderDetailStream.keyBy(_.orderId)) .between(Time.seconds(-5),Time.seconds(5)) // .lowerBoundExclusive() 设置左边开区间 // .upperBoundExclusive() 设置右边开区间 .process(new ProcessJoinFunction[Order,OrderDetail,OrderInfo] { override def processElement(order: Order, detail: OrderDetail, ctx: ProcessJoinFunction[Order, OrderDetail, OrderInfo]#Context, out: Collector[OrderInfo]): Unit = { val orderInfo = OrderInfo(order.id,order.orderNum,detail.id,detail.productCode,detail.qty,detail.price,detail.ts) out.collect(orderInfo) } }) //4. 打印 finalStream.print("order info >>>> ") //5. 启动任务 env.execute("joinStream") } }
订单数据:
1001,PO0001,1
1002,PO0002,2
订单明细数据:
2001,1001,xiaomi,3,2000,1
2002,1001,apple,2,5000,1
2003,1002,Java程序设计,1,50,2
2004,1002,Scala语言精通,1,60,3
2005,1003,SumSundisk,2,8000,3
通过将两个流式数据关联完成订单信息表的合并。