• Spark Java版本wordCount


    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.*;
    import scala.Tuple2;
    
    import java.net.URL;
    import java.util.Arrays;
    import java.util.Comparator;
    import java.util.Iterator;
    import java.util.List;
    
    public class wordcount{
    
        public static void main(String[] args) {
    
            SparkConf conf = new SparkConf();
            conf.setMaster("local[*]").setAppName("wc");
            JavaSparkContext sc = new JavaSparkContext(conf);
            URL url = wordcount.class.getResource("/wc.txt");
            JavaRDD<String> lineRDD = sc.textFile(url.getPath());
            JavaRDD<String> flatRDD = lineRDD.flatMap(new FlatMapFunction<String, String>() {
                @Override
                public Iterator<String> call(String line) throws Exception {
    
                    String[] fields = line.split(" ");
                    List<String> list = Arrays.asList(fields);
                    return list.iterator();
                }
            });
    
            JavaPairRDD<String, Integer> mapRDD = flatRDD.mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) throws Exception {
                    Tuple2<String, Integer> stringIntegerTuple2 = new Tuple2<String, Integer>(s, 1);
                    return stringIntegerTuple2;
                }
            });
    
            JavaPairRDD<String, Integer> resultRDD = mapRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
                @Override
                public Integer call(Integer v1, Integer v2) throws Exception {
                    return v1 + v2;
                }
            });
    
    //        resultRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
    //            @Override
    //            public void call(Tuple2<String, Integer> tuple2) throws Exception {
    //                System.out.println(tuple2._1 +"  :  " + tuple2._2.toString());
    //            }
    //        });
    
            Iterator<Tuple2<String, Integer>> iter = resultRDD.sortByKey(false).collect().iterator();
            while(iter.hasNext())
            {
                Tuple2<String, Integer> wc = iter.next();
                System.out.println(wc._1 + " : " + wc._2.toString());
            }
            sc.stop();
        }
    }

    太烦了

  • 相关阅读:
    POJ3070 Fibonacci[矩阵乘法]【学习笔记】
    NOIP模拟赛20161023
    洛谷P2964 [USACO09NOV]硬币的游戏A Coin Game
    Jmeter之Bean shell使用
    jmeter 之 BeanShell PostProcessor跨线程全局变量使用
    详解JMeter函数和变量
    性能测试指标(图表)
    http协议基础(十一)http与https
    http协议进阶(六)代理
    http协议进阶(五)连接管理
  • 原文地址:https://www.cnblogs.com/kpwong/p/14036123.html
Copyright © 2020-2023  润新知