1、POJO方式
public class WordCountPojo { public static class Word{ private String word; private int frequency; public Word() { } public Word(String word, int frequency) { this.word = word; this.frequency = frequency; } public String getWord() { return word; } public void setWord(String word) { this.word = word; } public int getFrequency() { return frequency; } public void setFrequency(int frequency) { this.frequency = frequency; } @Override public String toString() { return "Word=" + word + " freq=" + frequency; } } /** * Implements the string tokenizer that splits sentences into words as a user-defined * FlatMapFunction. The function takes a line (String) and splits it into * multiple Word objects. */ public static final class Tokenizer implements FlatMapFunction<String, Word> { @Override public void flatMap(String value, Collector<Word> out) { // normalize and split the line String[] tokens = value.toLowerCase().split("\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Word(token, 1)); } } } } public static void main(String args[]) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataSet<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { // get default test text data System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); text = WordCountData.getDefaultTextLineDataSet(env); } DataSet<Word> counts = text // split up the lines into Word objects (with frequency = 1) .flatMap(new Tokenizer()) // group by the field word and sum up the frequency .groupBy("word") .reduce(new ReduceFunction<Word>() { @Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word, value1.frequency + value2.frequency); } }); if (params.has("output")) { counts.writeAsText(params.get("output"), FileSystem.WriteMode.OVERWRITE); // execute program env.execute("WordCount-Pojo Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } } }
2、元组方式
public class WordCount { /** * Implements the string tokenizer that splits sentences into words as a user-defined * FlatMapFunction. The function takes a line (String) and splits it into * multiple pairs in the form of "(word,1)" ({@code Tuple2<String, Integer>}). */ public static final class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> { @Override public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception { // normalize and split the line String[] tokens = value.toLowerCase().split("\W+"); // emit the pairs for (String token : tokens) { if (token.length() > 0) { out.collect(new Tuple2<>(token, 1)); } } } } public static void main(String args[]) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataSet<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { // get default test text data System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); text = WordCountData.getDefaultTextLineDataSet(env); } DataSet<Tuple2<String,Integer>> counts = text // split up the lines in pairs (2-tuples) containing: (word,1) .flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .reduce(new ReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return new Tuple2<>(value1.f0,value1.f1+value2.f1); } }); //等效于sum(1) // .sum(1); // emit result if(params.has("output")){ counts.writeAsCsv(params.get("output")," "," "); // execute program env.execute("WordCount batch"); }else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } } }