/** * # _*_ coding:utf-8 _*_ * # Author:xiaoshubiao * # Time : 2020/5/14 8:33 **/ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function2; import scala.Tuple2; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class union_test { public static void main(String[] args) { SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("union_test"); JavaSparkContext sc = new JavaSparkContext(conf); List<String> list = Arrays.asList("a","b","c","d","e"); List<String> list2 = Arrays.asList("a","b","c","f","h"); JavaRDD<String> parallelize = sc.parallelize(list,2); JavaRDD<String> parallelize2 = sc.parallelize(list2,2); JavaPairRDD javaPairRDD = parallelize.mapToPair(x -> new Tuple2(x, 1)); JavaPairRDD javaPairRDD1 = parallelize2.mapToPair(x -> new Tuple2(x, 2)); javaPairRDD.join(javaPairRDD1).collect().forEach(x->System.out.println("join"+x)); javaPairRDD.leftOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("leftOuterJoin"+x)); javaPairRDD.rightOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("rightOuterJoin"+x)); javaPairRDD.fullOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("fullOuterJoin"+x)); } }