• Spark 用Scala和Java分别实现wordcount


    Scala

    import org.apache.spark.{SparkConf, SparkContext}
    
    object wordcount {
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setAppName("wc_java").setMaster("local[*]")
        val sc = new SparkContext(conf)
        val lines = sc.textFile("H:/server.properties")
        val rdd1 = lines.flatMap(line=>line.split(" "))
        val totalLength = rdd1.map(word=>(word,1))
        val total_KV = totalLength.reduceByKey(_+_)
        total_KV.collect()
        total_KV.foreach(println)
      }
    }
    

      

    Java

    import org.apache.spark.SparkConf;
    import org.apache.spark.SparkContext;
     import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.Function2;
    import org.apache.spark.api.java.function.PairFunction;
    import scala.Tuple2;
    
    import java.util.ArrayList;
    import java.util.Iterator;
     import java.util.List;
    
    
    public class WordCountJava {
        public static void main(String[] args) {
            //创建SparkConf对象
            SparkConf conf = new SparkConf();
            conf.setAppName("WordCountJava2");
            conf.setMaster("local");
    
            //创建java sc
            JavaSparkContext sc = new JavaSparkContext(conf);
            //加载文本文件
            JavaRDD<String> rdd1 = sc.textFile("d:/scala//test.txt");
    
            //压扁
            JavaRDD<String> rdd2 = rdd1.flatMap(new FlatMapFunction<String, String>() {
                public Iterator<String> call(String s) throws Exception {
                    List<String> list = new ArrayList<String>();
                    String[] arr = s.split(" ");
                    for(String ss :arr){
                        list.add(ss);
                    }
                    return list.iterator();
                }
            });
    
            //映射,word -> (word,1)
            JavaPairRDD<String,Integer> rdd3 = rdd2.mapToPair(new PairFunction<String, String, Integer>() {
                public Tuple2<String, Integer> call(String s) throws Exception {
                    return new Tuple2<String, Integer>(s,1);
                }
            });
    
            //reduce化简
            JavaPairRDD<String,Integer> rdd4 = rdd3.reduceByKey(new Function2<Integer, Integer, Integer>() {
                public Integer call(Integer v1, Integer v2) throws Exception {
                    return v1 + v2;
                }
            });
    
            //
            List<Tuple2<String,Integer>> list = rdd4.collect();
            for(Tuple2<String, Integer> t : list){
                System.out.println(t._1() + " : " + t._2());
            }
        }
    }
    

      

  • 相关阅读:
    Linux Shell脚本详细教程
    linux下错误代码E212: Can't open file for writing
    github仓库的基本使用-创建、上传文件、删除
    -bash: ifconfig: command not found解决办法
    Xshell能ping通但连不上CentOS 7
    devtools和vuex mutations
    Google Chrome谷歌浏览器安装devtools
    Vuex状态管理模式
    vue Promise all
    vue Promise链式调用
  • 原文地址:https://www.cnblogs.com/Alcesttt/p/11448377.html
Copyright © 2020-2023  润新知