spark-streaming读hdfs,统计文件中单词数量,并写入mysql
package com.yeliang; import java.sql.Connection; import java.sql.Statement; import java.util.Arrays; import java.util.Iterator; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.VoidFunction; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.Time; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import scala.Tuple2; import scala.collection.generic.BitOperations.Int; public class SparkStreamTest { public static void main(String[] args) { //本地运行 SparkConf conf = new SparkConf().setMaster("local[1]").setAppName("xxzx"); //每5秒提交spark JavaStreamingContext jssc = new JavaStreamingContext(conf,Durations.seconds(5)); //读hdfs JavaDStream<String> stream = jssc.textFileStream("hdfs://n1:9000/wordcount_dir"); JavaDStream<String> map = stream.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; public Iterable<String> call(String arg0) throws Exception { return Arrays.asList(arg0.split(" ")); } }); JavaPairDStream<String, Integer> pairDStream = map.mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; public Tuple2<String, Integer> call(String arg0) throws Exception { // TODO Auto-generated method stub return new Tuple2<String, Integer>(arg0,1) ; } }); JavaPairDStream<String, Integer> result = pairDStream.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public Integer call(Integer arg0, Integer arg1) throws Exception { // TODO Auto-generated method stub return arg0+arg1; } }); result.print(); result.foreachRDD(new VoidFunction<JavaPairRDD<String, Integer>>() { @Override public void call(JavaPairRDD<String, Integer> arg0) throws Exception { arg0.foreachPartition(new VoidFunction<Iterator<Tuple2<String,Integer>>>() { @Override public void call(Iterator<Tuple2<String, Integer>> arg0) throws Exception { Connection conn = ConnectionPool.getConnection(); Statement stat = conn.createStatement(); while(arg0.hasNext()){ Tuple2<String, Integer> wordcount = arg0.next(); String sql = "insert into wordcount(word,count) values('"+wordcount._1+"',"+wordcount._2()+")"; stat.addBatch(sql); } stat.executeBatch(); ConnectionPool.returnConnection(conn); } }); } }); jssc.start(); jssc.awaitTermination(); jssc.close(); } }