• FlinkSink(Kafka、Redis、ES、JDBC)


    Flink 没有类似于 spark 中 foreach 方法,让用户进行迭代的操作。虽有对外的输出操作都要利用 Sink 完成。最后通过类似如下方式完成整个任务最终输出操作。
    stream.addSink(new MySink(xxxx))
    官方提供了一部分的框架的 sink。除此以外,需要用户自定义实现 sink。
     

    5.0 File

    package com.zhen.flink.api.sink
    
    import com.zhen.flink.api.SensorReading
    import org.apache.flink.api.common.serialization.SimpleStringEncoder
    import org.apache.flink.core.fs.Path
    import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    import org.apache.flink.streaming.api.scala._
    
    
    /**
      * @Author FengZhen
      * @Date 6/8/22 10:43 PM
      * @Description TODO
      */
    object FileSink {
    
      def main(args: Array[String]): Unit = {
        val env = StreamExecutionEnvironment.getExecutionEnvironment
    
        env.setParallelism(1)
    
        // 0.读取数据
        val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
        val inputStream = env.readTextFile(filePath)
    
        // 1.先转换成样例数据
        val dataStream: DataStream[SensorReading] = inputStream
          .map(
            data => {
              val arr = data.split(",")
              SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
            }
          )
    
        dataStream.print()
        val outFilePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor_out.txt"
        dataStream.writeAsCsv(outFilePath)
    
        val outFilePath1 = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor_out_1.txt"
        dataStream.addSink(
          StreamingFileSink.forRowFormat(
            new Path(outFilePath1),
            new SimpleStringEncoder[SensorReading]()
          ).build()
        )
    
        env.execute("file sink.")
      }
    
    }

    5.1 Kafka

    package com.zhen.flink.api.sink
    
    import java.util.Properties
    
    import com.zhen.flink.api.SensorReading
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import org.apache.flink.core.fs.Path
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    import org.apache.flink.streaming.api.scala._
    import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
    
    /**
      * @Author FengZhen
      * @Date 6/11/22 3:20 PM
      * @Description TODO
      */
    object KafkaSink {
    
      def main(args: Array[String]): Unit = {
    
    
        val env = StreamExecutionEnvironment.getExecutionEnvironment
    
        env.setParallelism(1)
    
        // 0.读取数据
        val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
        val inputStream = env.readTextFile(filePath)
    
    
        //从kafka读取数据
        val properties = new Properties()
        properties.setProperty("bootstrap.servers", "localhost:9092")
        properties.setProperty("group.id", "consumer-group")
        properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
        properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
        properties.setProperty("auto.offset.reset", "latest")
    
        val streamKafka = env.addSource( new FlinkKafkaConsumer[String](
          "topic_sensor",
          new SimpleStringSchema(),
          properties
        ))
    
        // 1.先转换成样例数据
        val dataStream: DataStream[String] = streamKafka
          .map(
            data => {
              val arr = data.split(",")
              SensorReading(arr(0), arr(1).toLong, arr(2).toDouble).toString
            }
          )
    
        dataStream.addSink(
          new FlinkKafkaProducer[String]("localhost:9092", "topic_flink_kafka_sink", new SimpleStringSchema())
        )
    
        //./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic topic_sensor
    
        // ./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic topic_flink_kafka_sink
    
        env.execute("kafka sink.")
    
    
      }
    
    }
     

    5.2 Redis

    package com.zhen.flink.api.sink
    
    import com.zhen.flink.api.SensorReading
    import org.apache.flink.api.common.serialization.SimpleStringEncoder
    import org.apache.flink.core.fs.Path
    import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    import org.apache.flink.streaming.api.scala._
    import org.apache.flink.streaming.connectors.redis.RedisSink
    import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
    import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
    
    /**
      * @Author FengZhen
      * @Date 6/12/22 8:23 PM
      * @Description TODO
      */
    object RedisSink {
    
    
      def main(args: Array[String]): Unit = {
    
        val env = StreamExecutionEnvironment.getExecutionEnvironment
    
        env.setParallelism(1)
    
        // 0.读取数据
        val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
        val inputStream = env.readTextFile(filePath)
    
        // 1.先转换成样例数据
        val dataStream: DataStream[SensorReading] = inputStream
          .map(
            data => {
              val arr = data.split(",")
              SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
            }
          )
    
        // 定义一个FlinkJedisConfigBase
        val conf = new FlinkJedisPoolConfig.Builder()
            .setHost("localhost")
            .setPort(6379)
            .setDatabase(1)
            .build()
    
        dataStream.addSink( new RedisSink[SensorReading](conf, new MyRedisMapper))
    
        env.execute("redis sink.")
    
      }
    
      // 定义一个redis mapper
      class MyRedisMapper extends RedisMapper[SensorReading]{
    
        // 定义保存数据写入Redis的命令,HSET 表名 key value
        override def getCommandDescription: RedisCommandDescription = {
          new RedisCommandDescription(RedisCommand.HSET, "sensor_temp")
        }
    
    
        // 将ID指定位可以
        override def getKeyFromData(t: SensorReading): String =
          t.id
    
        // 将温度指定为value
        override def getValueFromData(t: SensorReading): String =
          t.temperature.toString
      }
    
    }
     

    5.3 Elasticsearch

    package com.zhen.flink.api.sink
    
    import java.util
    
    import com.zhen.flink.api.SensorReading
    import org.apache.flink.api.common.functions.RuntimeContext
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    import org.apache.flink.streaming.api.scala._
    import org.apache.flink.streaming.connectors.elasticsearch.{ElasticsearchSinkBase, ElasticsearchSinkFunction, RequestIndexer}
    import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink
    import org.apache.http.HttpHost
    import org.elasticsearch.client.Requests
    
    /**
      * @Author FengZhen
      * @Date 6/17/22 3:39 PM
      * @Description TODO
      */
    object ElasticsearchSinkTest {
    
      def main(args: Array[String]): Unit = {
        val env = StreamExecutionEnvironment.getExecutionEnvironment
    
        env.setParallelism(1)
    
        // 0.读取数据
        val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
        val inputStream = env.readTextFile(filePath)
    
        // 1.先转换成样例数据
        val dataStream: DataStream[SensorReading] = inputStream
          .map(
            data => {
              val arr = data.split(",")
              SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
            }
          )
    
    
        // 定义HttpHosts
        val httpHosts = new util.ArrayList[HttpHost]()
        httpHosts.add(new HttpHost("localhost", 9200))
    
        // 自定义写入ES的EsSinkFunction
        val myEsSinkFunc = new ElasticsearchSinkFunction[SensorReading] {
          override def process(element: SensorReading, ctx: RuntimeContext, indexer: RequestIndexer): Unit = {
    
            // 包装一个map作为DataSource
            val dataSource = new util.HashMap[String, String]()
            dataSource.put("id", element.id)
            dataSource.put("temperature", element.temperature.toString)
            dataSource.put("ts", element.timestamp.toString)
    
            // 创建index request,用于发送http请求
            val indexRequest = Requests.indexRequest()
              .index("sensor")
              .`type`("reading_data")
              .source(dataSource)
    
            // 用indexer发送请求
            indexer.add(indexRequest)
    
          }
        }
    
        dataStream.addSink(
          new ElasticsearchSink.Builder[SensorReading](httpHosts, myEsSinkFunc)
            .build()
        )
        env.execute("elasticsearch sink.")
      }
    }
     

    5.4 JDBC自定义sink

    package com.zhen.flink.api.sink
    
    import java.sql.{Connection, DriverManager, PreparedStatement}
    
    import com.zhen.flink.api.SensorReading
    import org.apache.flink.configuration.Configuration
    import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    import org.apache.flink.streaming.api.scala._
    
    
    /**
      * @Author FengZhen
      * @Date 7/1/22 2:21 PM
      * @Description TODO
      */
    object JdbcSink {
    
      def main(args: Array[String]): Unit = {
    
        val env = StreamExecutionEnvironment.getExecutionEnvironment
    
        env.setParallelism(1)
    
        // 0.读取数据
        val filePath = "/Users/FengZhen/Desktop/accumulate/0_project/flink_learn/src/main/resources/data/sensor.txt"
        val inputStream = env.readTextFile(filePath)
    
        // 1.先转换成样例数据
        val dataStream: DataStream[SensorReading] = inputStream
          .map(
            data => {
              val arr = data.split(",")
              SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
            }
          )
    
        dataStream.addSink(new MyJdbcSinkFunc())
    
        env.execute("jdbc sink")
    
      }
    
      class MyJdbcSinkFunc() extends RichSinkFunction[SensorReading]{
    
        // 定义连接、预编译语句
        var conn: Connection = _
        var insertStmt: PreparedStatement = _
        var updateStmt: PreparedStatement = _
    
    
        override def open(parameters: Configuration): Unit = {
          conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "1234qwer")
          insertStmt = conn.prepareStatement("insert into sensor_temp (id, temp) values (?,?)")
          updateStmt = conn.prepareStatement("update sensor_temp set temp = ? where id = ?")
        }
    
        override def invoke(value: SensorReading, context: SinkFunction.Context): Unit = {
    
          // 先执行更新操作,查到就更新
          updateStmt.setDouble(1, value.temperature)
          updateStmt.setString(2, value.id)
          updateStmt.execute()
    
          //如果更新没有查到数据,那么就插入
          if(updateStmt.getUpdateCount == 0){
            insertStmt.setString(1, value.id)
            insertStmt.setDouble(2, value.temperature)
            insertStmt.execute()
          }
        }
    
        override def close(): Unit = {
          insertStmt.close()
          updateStmt.close()
          conn.close()
        }
      }
    
    }

    pom.xml

    <?xml version="1.0" encoding="UTF-8"?>
    
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.zhen.flink</groupId>
        <artifactId>flink_learn</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <name>flink_learn Maven</name>
    
    
        <properties>
            <scala_version>2.12</scala_version>
            <flink_version>1.13.1</flink_version>
        </properties>
    
        <dependencies>
    
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-clients_${scala_version}</artifactId>
                <version>${flink_version}</version>
            </dependency>
    
    
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-scala_${scala_version}</artifactId>
                <version>${flink_version}</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-streaming-scala_${scala_version}</artifactId>
                <version>${flink_version}</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-kafka_${scala_version}</artifactId>
                <version>${flink_version}</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.bahir/flink-connector-redis -->
            <dependency>
                <groupId>org.apache.bahir</groupId>
                <artifactId>flink-connector-redis_2.11</artifactId>
                <version>1.0</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-elasticsearch6_${scala_version}</artifactId>
                <version>${flink_version}</version>
            </dependency>
    
            <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>5.1.44</version>
            </dependency>
    
        </dependencies>
    
        <build>
            <plugins> <!-- 该插件用于将 Scala 代码编译成 class 文件 -->
                <plugin>
                    <groupId>net.alchim31.maven</groupId>
                    <artifactId>scala-maven-plugin</artifactId>
                    <version>3.4.6</version>
                    <executions>
                        <execution> <!-- 声明绑定到 maven 的 compile 阶段 -->
                            <goals>
                                <goal>compile</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <version>3.0.0</version>
                    <configuration>
                        <descriptorRefs>
                            <descriptorRef>jar-with-dependencies</descriptorRef>
                        </descriptorRefs>
                    </configuration>
                    <executions>
                        <execution>
                            <id>make-assembly</id>
                            <phase>package</phase>
                            <goals>
                                <goal>single</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
        </build>
    
    
    </project>
     
     
     
  • 相关阅读:
    ACM题目————STL练习之求次数
    ACM题目————STL + 全排列
    ACM题目———— 一种排序(STL之set)
    ACM第二站————STL之stack
    ACM题目————中位数
    ACM题目————Sunscreen
    ACM题目————区间覆盖问题
    ACM题目————困难的串
    ACM题目————A Knight's Journey
    ACM题目————Face The Right Way
  • 原文地址:https://www.cnblogs.com/EnzoDin/p/16434645.html
Copyright © 2020-2023  润新知