• flink 读取 CSV 文件,并将 DataStream 转 Table 对象


    package com.myflink
    
    import java.lang.reflect.Field
    import java.util
    
    import org.apache.flink.api.common.typeinfo._
    import org.apache.flink.api.java.io.{PojoCsvInputFormat, RowCsvInputFormat}
    import org.apache.flink.api.java.typeutils.{PojoField, PojoTypeInfo}
    import org.apache.flink.core.fs.Path
    import org.apache.flink.streaming.api.TimeCharacteristic
    import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
    import org.apache.flink.streaming.api.windowing.time.Time
    import org.apache.flink.table.api.Tumble
    import org.apache.flink.table.api.scala._
    import org.apache.flink.types.Row
    
    object Main {
    
      def main(args: Array[String]): Unit = {
        import org.apache.flink.streaming.api.scala._
    
        val env = StreamExecutionEnvironment.getExecutionEnvironment;
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    
        val senSor = SenSor("a", 0L, 0.2);
        val lst = new util.ArrayList[PojoField]();
        val arrFields: Array[Field] = senSor.getClass.getDeclaredFields;
        for (field <- arrFields) {
          lst.add(new PojoField(field, TypeInformation.of(field.getType)));
        }
    
        val path = new Path("D:\allspace\flink0906\src\main\resources\input\test.csv");
        val ds: DataStream[SenSor] = env
          .createInput(new PojoCsvInputFormat(path, new PojoTypeInfo(classOf[SenSor], lst)))
          .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[SenSor](Time.seconds(10)) {
            override def extractTimestamp(element: SenSor): Long = element.timestamp * 1000
          });
    
        val tableEnv = StreamTableEnvironment.create(env);
        val table = tableEnv.fromDataStream(ds, 'id, 'timestamp.rowtime, 'uempearture);
        table.window(Tumble over 10.seconds on 'timestamp as 'tw)
          .groupBy('id, 'tw)
          .select('id, 'id.count, 'uempearture.avg, 'tw.end)
          .toRetractStream[Row].print()
    
    
        env.execute();
      }
    
    }
    
    case class SenSor(var id: String, var timestamp: Long, var uempearture: Double) {};
    

      这里面坑:

    SenSor 对象的属性,在构造 PojoTypeInfo 时按照名字重排序,这直接造成对 csv 文件解析出错。

    csv文件内容:
    sensor_1,1547718199,35.8
    sensor_6,1547718201,15.4

    flink版本为 1.10.1




  • 相关阅读:
    批处理处理oracle数据库脚本导入
    Log4Net 配置说明
    MSSQL和oracle数据通讯
    计算字符串中每种字符出现的次数[Dictionary<char,int>泛型集合用法]
    newagg新蛋笔试题
    Oracle数据库切换SQLServer数据库解决方案
    sqlserver 的事务和c#的事务
    SQL Server 2005 导出 数据脚本
    ASP.NET CMD WebShell
    Silverlight BUG
  • 原文地址:https://www.cnblogs.com/wudeyun/p/13688745.html
Copyright © 2020-2023  润新知