• flink batch wordcount


    1、POJO方式

    public class WordCountPojo {
        public static class Word{
            private String word;
            private int frequency;
    
            public Word() {
            }
    
            public Word(String word, int frequency) {
                this.word = word;
                this.frequency = frequency;
            }
    
            public String getWord() {
                return word;
            }
    
            public void setWord(String word) {
                this.word = word;
            }
    
            public int getFrequency() {
                return frequency;
            }
    
            public void setFrequency(int frequency) {
                this.frequency = frequency;
            }
    
            @Override
            public String toString() {
                return "Word=" + word + " freq=" + frequency;
            }
        }
    
        /**
         * Implements the string tokenizer that splits sentences into words as a user-defined
         * FlatMapFunction. The function takes a line (String) and splits it into
         * multiple Word objects.
         */
        public static final class Tokenizer implements FlatMapFunction<String, Word> {
    
            @Override
            public void flatMap(String value, Collector<Word> out) {
                // normalize and split the line
                String[] tokens = value.toLowerCase().split("\W+");
    
                // emit the pairs
                for (String token : tokens) {
                    if (token.length() > 0) {
                        out.collect(new Word(token, 1));
                    }
                }
            }
        }
    
        public static void main(String args[]) throws Exception {
            final ParameterTool params = ParameterTool.fromArgs(args);
    
            // set up the execution environment
            final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
            // make parameters available in the web interface
            env.getConfig().setGlobalJobParameters(params);
    
            // get input data
            DataSet<String> text;
            if (params.has("input")) {
                // read the text file from given input path
                text = env.readTextFile(params.get("input"));
            } else {
                // get default test text data
                System.out.println("Executing WordCount example with default input data set.");
                System.out.println("Use --input to specify file input.");
                text = WordCountData.getDefaultTextLineDataSet(env);
            }
    
            DataSet<Word> counts = text
                    // split up the lines into Word objects (with frequency = 1)
                    .flatMap(new Tokenizer())
                    // group by the field word and sum up the frequency
                    .groupBy("word")
                    .reduce(new ReduceFunction<Word>() {
                        @Override
                        public Word reduce(Word value1, Word value2) throws Exception {
                            return new Word(value1.word, value1.frequency + value2.frequency);
                        }
                    });
            if (params.has("output")) {
                counts.writeAsText(params.get("output"), FileSystem.WriteMode.OVERWRITE);
                // execute program
                env.execute("WordCount-Pojo Example");
            } else {
                System.out.println("Printing result to stdout. Use --output to specify output path.");
                counts.print();
            }
        }
    
    
    }

    2、元组方式

    public class WordCount {
    
        /**
         * Implements the string tokenizer that splits sentences into words as a user-defined
         * FlatMapFunction. The function takes a line (String) and splits it into
         * multiple pairs in the form of "(word,1)" ({@code Tuple2<String, Integer>}).
         */
        public static final class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                // normalize and split the line
                String[] tokens = value.toLowerCase().split("\W+");
    
                // emit the pairs
                for (String token : tokens) {
                    if (token.length() > 0) {
                        out.collect(new Tuple2<>(token, 1));
                    }
                }
            }
        }
    
        public static void main(String args[]) throws Exception {
            final ParameterTool params = ParameterTool.fromArgs(args);
    
            // set up the execution environment
            final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
            // make parameters available in the web interface
            env.getConfig().setGlobalJobParameters(params);
    
            // get input data
            DataSet<String> text;
            if (params.has("input")) {
                // read the text file from given input path
                text = env.readTextFile(params.get("input"));
            } else {
                // get default test text data
                System.out.println("Executing WordCount example with default input data set.");
                System.out.println("Use --input to specify file input.");
                text = WordCountData.getDefaultTextLineDataSet(env);
            }
    
            DataSet<Tuple2<String,Integer>> counts = text
                    // split up the lines in pairs (2-tuples) containing: (word,1)
                    .flatMap(new Tokenizer())
                    // group by the tuple field "0" and sum up tuple field "1"
                    .groupBy(0)
                    .reduce(new ReduceFunction<Tuple2<String, Integer>>() {
                        @Override
                        public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
                            return new Tuple2<>(value1.f0,value1.f1+value2.f1);
                        }
                    }); //等效于sum(1)
    //                .sum(1);
            // emit result
            if(params.has("output")){
                counts.writeAsCsv(params.get("output"),"
    "," ");
                // execute program
                env.execute("WordCount batch");
            }else {
                System.out.println("Printing result to stdout. Use --output to specify output path.");
                counts.print();
            }
    
        }
    }
  • 相关阅读:
    Hibernate查询基本语句 全新时代
    word表格设置背景色方法 全新时代
    Html网页背景渐变色代码 全新时代
    FlashFXP列表参数错误解决方法 全新时代
    svn导出功能不包含.svn文件 全新时代
    JDBC连接SQL Server测试代码及异常 全新时代
    javascript:滚动新闻
    C# 时间函数(几个常用时间,程序运行计时,页面运行计时)
    C#:当把U盘放插入,然后程序自动将U盘的内容复制到本地硬盘
    C#:转换成中文数字
  • 原文地址:https://www.cnblogs.com/asker009/p/10952588.html
Copyright © 2020-2023  润新知