• flink第一个应用


    去年华为大佬就开始在用flink,今天刚有空就稍微跟着写了个demo玩起来(就不用java了 spark和flink还是用scala玩)

    package flink.test
    import org.apache.flink.api.java.utils.ParameterTool
    import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
    import org.apache.flink.streaming.api.windowing.time.Time

    object StreamingWindowWordCount {
    def main(args:Array[String]):Unit={
    //get port param
    val port:Int = try {
    ParameterTool.fromArgs(args).getInt("port")
    }catch{
    case e:Exception=> {
    System.err.println("no port")
    }
    9876(启动linux的NC -l 9876端口进行监听)
    }

    //获取运行环境
    val env:StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment;
    //解析参数
    val text = env.socketTextStream("rhel071",port,' ')
    //解析数据,分组,窗口操作,聚合求sum
    //注意:在这需要做一个隐式转换,否则使用flatmap会报错
    import org.apache.flink.api.scala._
    val windowCount = text.flatMap(line=>line.split("\s"))
    .map(word=>WordWithCount(word,1L))
    .keyBy("word")
    .timeWindow(Time.seconds(2),Time.seconds(1))
    .reduce((a,b)=>WordWithCount(a.word,a.count + b.count))//key 一样获取相同的数据进行汇总(scala逻辑基本和spark没什么两样,都是进行数据的算子操作,需要action算子才能触发动作)
    //.sum("count")

    //使用一个单线程打印结果
    windowCount.print().setParallelism(1)
    env.execute("streaming word count")
    }
    case class WordWithCount(word:String,count:Long)
    }





    maven项目(这种东西不适合自己找jar包,本地测试需要的jar包量实在太多,特别项目牵扯上hadoop hbase的时候)
    这里是小例子应用到的maven,记录下
    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>finkDemo_20180918</groupId>
    <artifactId>finkDemo</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-java</artifactId>
    <version>1.4.2</version>
    <!--<scope>provided</scope>-->
    </dependency>
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-java_2.11</artifactId>
    <version>1.4.2</version>
    <!--<scope>provided</scope>-->
    </dependency>

    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-scala_2.11</artifactId>
    <version>1.4.2</version>
    <!--<scope>provided</scope>-->
    </dependency>
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-scala_2.11</artifactId>
    <version>1.4.2</version>
    <!--<scope>provided</scope>-->
    </dependency>
    </dependencies>
    </project>



    flink的离线操作

    package flink.test

    import org.apache.flink.api.scala.ExecutionEnvironment

    object BatchWordCountScala {
    def main(args:Array[String]):Unit = {
    val inputPath:String = "D:\flink\batch\file";
    val outputPath:String = "D:\flink\data\result";
    val env:ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment;
    val text = env.readTextFile(inputPath)
    import org.apache.flink.api.scala._
    val counts = text.flatMap(line => line.split(" ")).map(word => WordWithCount(word,1L)).groupBy(0).sum(1)
    counts.writeAsCsv(outputPath," "," ")
    env.execute("batch word count")
    }
    case class WordWithCount(word:String,count:Long)
    }

    把目录下的text文件word解析统计后存入result目录
    
    
  • 相关阅读:
    笔试:一个逻辑题
    jmeter,学这些重点就可以了
    性能测试:通过一个案例(等待锁超时)告诉你,性能到底要不要熟悉业务逻辑?
    源码解读:webdriver client的原理 (面试自动化:如果你认为知道18种定位方式就算会自动化,那就太low了)
    测试必备:jmeter测试http协议接口的各种传参方式
    Vue笔记:封装 axios 为插件使用
    Vue笔记:使用 axios 发送请求
    Tomcat笔记:Tomcat的执行流程解析
    Git笔记:Git介绍和常用命令汇总
    Spring Boot使用Shiro实现登录授权认证
  • 原文地址:https://www.cnblogs.com/yaohaitao/p/9674770.html
Copyright © 2020-2023  润新知