• Spark Java版本wordCount


    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.*;
    import scala.Tuple2;
    
    import java.net.URL;
    import java.util.Arrays;
    import java.util.Comparator;
    import java.util.Iterator;
    import java.util.List;
    
    public class wordcount{
    
        public static void main(String[] args) {
    
            SparkConf conf = new SparkConf();
            conf.setMaster("local[*]").setAppName("wc");
            JavaSparkContext sc = new JavaSparkContext(conf);
            URL url = wordcount.class.getResource("/wc.txt");
            JavaRDD<String> lineRDD = sc.textFile(url.getPath());
            JavaRDD<String> flatRDD = lineRDD.flatMap(new FlatMapFunction<String, String>() {
                @Override
                public Iterator<String> call(String line) throws Exception {
    
                    String[] fields = line.split(" ");
                    List<String> list = Arrays.asList(fields);
                    return list.iterator();
                }
            });
    
            JavaPairRDD<String, Integer> mapRDD = flatRDD.mapToPair(new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) throws Exception {
                    Tuple2<String, Integer> stringIntegerTuple2 = new Tuple2<String, Integer>(s, 1);
                    return stringIntegerTuple2;
                }
            });
    
            JavaPairRDD<String, Integer> resultRDD = mapRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
                @Override
                public Integer call(Integer v1, Integer v2) throws Exception {
                    return v1 + v2;
                }
            });
    
    //        resultRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
    //            @Override
    //            public void call(Tuple2<String, Integer> tuple2) throws Exception {
    //                System.out.println(tuple2._1 +"  :  " + tuple2._2.toString());
    //            }
    //        });
    
            Iterator<Tuple2<String, Integer>> iter = resultRDD.sortByKey(false).collect().iterator();
            while(iter.hasNext())
            {
                Tuple2<String, Integer> wc = iter.next();
                System.out.println(wc._1 + " : " + wc._2.toString());
            }
            sc.stop();
        }
    }

    太烦了

  • 相关阅读:
    SIP语音对讲从零到整之(二)resiprocate编译(Linux,库用于Android)
    Intel Media SDK decode_sample剖析(一)——初始化及解码数据头
    Hadoop-HA集群搭建-rehl7.4
    (1)jmeter录制脚本(使用代理的方式)
    Java自动化环境搭建笔记(3)
    Java自动化环境搭建笔记(1)
    ant安装(Windows)
    jmeter源码环境(IDEA)
    离线安装docker(RedHat7.4)
    PostgreSQL日志分析工具
  • 原文地址:https://www.cnblogs.com/kpwong/p/14036123.html
Copyright © 2020-2023  润新知