• Flink DataStream Source(二)


    Flink Source

    上下文环境
          import org.apache.flink.api.scala.ExecutionEnvironment
          val env = ExecutionEnvironment.getExecutionEnvironment;//批处理运行上下文环境

          import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
          val streamenv = StreamExecutionEnvironment.getExecutionEnvironment//流处理运行上下文环境
    对象、文本、socket Source
          import org.apache.flink.streaming.api.scala.createTypeInformation
          streamenv.fromElements[String]("1","2","3","4","5").print()
          streamenv.fromCollection(Array("6","7","8","9","10")).print()
          streamenv.readTextFile("/data/qujian.csv").print()
          streamenv.socketTextStream("***.***.***.***",7777).print()
    读Parquet
          import org.apache.flink.formats.parquet.ParquetRowInputFormat
          import org.apache.flink.core.fs.Path
    //      long 可以用 INT64 替换
    //      |-- a: string (nullable = true)
    //      |-- b: long (nullable = true)
    //      |-- c: string (nullable = true)
    //      |-- d: string (nullable = true)
    //      |-- e: long (nullable = true)
    //      |-- f: long (nullable = true)
    //      |-- g: string (nullable = true)
    //      |-- h: long (nullable = true)
    //      |-- i: long (nullable = true)
    //      |-- j: integer (nullable = true)
          import org.apache.parquet.schema.{MessageType, PrimitiveType}
          import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
          import org.apache.parquet.schema.Type.Repetition
          val a = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "a")
          val b = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "b")
          val c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "c")
          val d = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "d")
          val e = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "e")
          val f = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "f")
          val g = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "g")
          val h = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "h")
          val i = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "i")
          val j = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "j")

          val schema = new MessageType("dataschema", a, b,c,d,e,f,g,h,i,j)
          streamenv.readFile(new ParquetRowInputFormat(
             new Path("/data/data.parquet"), schema),"/data/data.parquet").print()
             //没有写错,就是需要写两遍路径
    读kafka 方式一
        import org.apache.flink.connector.kafka.source.KafkaSource
        import org.apache.flink.api.common.serialization.SimpleStringSchema
        import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
        import org.apache.flink.api.common.eventtime.WatermarkStrategy
        import org.apache.flink.streaming.api.scala.createTypeInformation
        val source = KafkaSource.builder()
          .setBootstrapServers("127.0.0.1:9092")
          .setTopics("events")
          .setGroupId("group")
          .setStartingOffsets(OffsetsInitializer.earliest())
          .setValueOnlyDeserializer(new SimpleStringSchema())
          .build();
        streamenv.fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source").print()
    读kafka 方式二
        import org.apache.flink.api.common.serialization.SimpleStringSchema
        import java.util.Properties
        import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
        val properties = new Properties();
        properties.setProperty("bootstrap.servers""127.0.0.1:9092");
        properties.setProperty("auto.offset.reset""earliest");
        properties.setProperty("group.id""group");
        val kafkaConsumer = new FlinkKafkaConsumer[String]("events",new SimpleStringSchema(),properties);
        streamenv.addSource(kafkaConsumer).print()
    读jdbc 方式一
        import org.apache.flink.connector.jdbc.JdbcInputFormat
        import org.apache.flink.api.common.typeinfo.{BasicTypeInfo}
        import org.apache.flink.api.java.typeutils.RowTypeInfo
        val jdbcInputFormat = JdbcInputFormat.buildJdbcInputFormat()
          .setDrivername("com.mysql.jdbc.Driver")
          .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?characterEncoding=UTF-8")
          .setUsername("root").setPassword("123456")
          .setQuery("SELECT id,name,score FROM student")
          .setRowTypeInfo(new RowTypeInfo(BasicTypeInfo.INT_TYPE_INFO, 
               BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO))
          .finish();
        streamenv.createInput(jdbcInputFormat).print()
    读jdbc 方式二
        import java.sql.{Connection, DriverManager, PreparedStatement}
        import org.apache.flink.configuration.Configuration
        import org.apache.flink.streaming.api.functions.source.RichSourceFunction
        import org.apache.flink.streaming.api.functions.source.SourceFunction
        class MyRichSourcejdbc  extends RichSourceFunction[(Int,String,Int)]{
          var conn:Connection = _
          var selectStatement:PreparedStatement = _
          override def open(parameters: Configuration): Unit = {
            conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/test","root","123456")
            selectStatement = conn.prepareStatement("SELECT id,name,score FROM student");
          }
          override def run(ctx: SourceFunction.SourceContext[(Int,String,Int)]): Unit = {
            val resultSet = selectStatement.executeQuery()
            while (resultSet.next()) {
              ctx.collect(resultSet.getInt(1),resultSet.getString(2),resultSet.getInt(3))
            }
          }
          override def cancel(): Unit = {
            selectStatement.close()
            conn.close()
          }
        }
        streamenv.addSource(new MyRichSourcejdbc()).print()//写MySQL方法
    读redis
          import org.apache.flink.configuration.Configuration
          import org.apache.flink.streaming.api.functions.source.{RichSourceFunction,SourceFunction}
          import redis.clients.jedis.{JedisPool,JedisPoolConfig,Protocol}
          class MyRedisSource extends RichSourceFunction[(StringString)]() {
            var jedisPool:JedisPool = _
            override def open(parameters: Configuration): Unit = {
              jedisPool = new JedisPool(new JedisPoolConfig, "127.0.0.1"6379, Protocol.DEFAULT_TIMEOUT)
            }
            override def run(ctx:SourceFunction.SourceContext[(StringString)]): Unit = {
              val jedis = jedisPool.getResource()
              ctx.collect(("test",jedis.get("test")))
              jedis.close()//实现RichSourceFunction抽象方法,加载数据源数据到流中
            }
            override def cancel(): Unit = {
              jedisPool.close()
            }
          }
          import org.apache.flink.streaming.api.scala.createTypeInformation
          streamenv.addSource(new MyRedisSource()).print()
    读redis异步
          import org.apache.flink.configuration.Configuration
          import org.apache.flink.streaming.api.scala.async.{ResultFuture, RichAsyncFunction}
          import redis.clients.jedis.{JedisPool,JedisPoolConfig,Protocol}
          import org.apache.flink.streaming.api.scala.{AsyncDataStream}
          import java.util.concurrent.TimeUnit
          import scala.concurrent.{Future,ExecutionContext}
          //基本逻辑就是一样的   但是异步的方法 你不知道它几十会执行  这个也是最大的缺陷
          //flink就是要实时算出来,这个特点和flink不匹配了
          class RedisAsyncFunction extends  RichAsyncFunction[String,String]{
            var jedisPool:JedisPool = _
            override def open(parameters: Configuration): Unit = {
              jedisPool = new JedisPool(new JedisPoolConfig, "127.0.0.1"6379, Protocol.DEFAULT_TIMEOUT)
            }
            override def asyncInvoke(input: String, resultFuture: ResultFuture[String]): Unit = {
              val jedis = jedisPool.getResource()
              println(jedis.get(input))
              if(jedis.get(input) != null){
                resultFuture.complete(Array(jedis.get(input)))
              }else{
                resultFuture.complete(Array("-99"))
              }
              jedis.close()
            }
            override def close(): Unit = {
              jedisPool.close()
            }
          }
          val stream = streamenv.fromElements[String]("test","test1","test2","test3","test4","test5")
          AsyncDataStream.unorderedWait(stream,new RedisAsyncFunction(), 10000, TimeUnit.MILLISECONDS, 100).print()
  • 相关阅读:
    rsync 安装使用详解
    shell全备份脚本(借鉴别人的,在其基础上修复完善了bug)
    完全备份、差异备份以及增量备份的区别
    云主机格式化和挂载数据盘
    JSONP跨域
    php的多线程使用
    tp其他功能
    Zend Guard Loader和Zend Optimizer的安装(更新中)
    前端编码规范
    前端优化
  • 原文地址:https://www.cnblogs.com/wuxiaolong4/p/16744139.html
Copyright © 2020-2023  润新知