• kafka-->spark-->phoenix


    一、在IDEA新建一个maven项目:

    1.[pom.xml]:

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com</groupId>
    <artifactId>SparkPhoenixDemo</artifactId>
    <version>1.0-SNAPSHOT</version>
    <name>${project.artifactId}</name>
    <description>My wonderfull scala app</description>
    <inceptionYear>2010</inceptionYear>
    <licenses>
    <license>
    <name>My License</name>
    <url>http://....</url>
    <distribution>repo</distribution>
    </license>
    </licenses>
    <properties>
    <maven.compiler.source>1.6</maven.compiler.source>
    <maven.compiler.target>1.6</maven.compiler.target>
    <encoding>UTF-8</encoding>
    <scala.tools.version>2.11</scala.tools.version>
    <scala.version>2.11.12</scala.version>
    </properties>

    <dependencies>
    <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
    <dependency>
    <groupId>com.fasterxml.jackson.core</groupId>
    <artifactId>jackson-core</artifactId>
    <version>2.6.3</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
    <dependency>
    <groupId>com.fasterxml.jackson.core</groupId>
    <artifactId>jackson-databind</artifactId>
    <version>2.6.3</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
    <dependency>
    <groupId>com.fasterxml.jackson.core</groupId>
    <artifactId>jackson-annotations</artifactId>
    <version>2.6.3</version>
    </dependency> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core --> <dependency> <groupId>org.apache.spark</groupId>
    <artifactId>spark-core_2.11</artifactId>
    <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.11</artifactId>
    <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
    <dependency>
    <groupId>org.apache.kafka</groupId>
    <artifactId>kafka-clients</artifactId>
    <version>0.11.0.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-kafka-0-10 -->
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
    <version>2.0.0</version>
    </dependency>
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
    <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming -->
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming_2.11</artifactId>
    <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-graphx -->
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-graphx_2.11</artifactId>
    <version>2.3.0</version>
    </dependency>
    <dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>1.4.8</version>
    </dependency>
    <dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-server</artifactId>
    <version>1.4.8</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-common -->
    <dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-common</artifactId>
    <version>1.4.8</version>
    </dependency>
    <!--<dependency>-->
    <!--<groupId>org.apache.hbase</groupId>-->
    <!--<artifactId>hbase-mapreduce</artifactId>-->
    <!--<version>2.0.0</version>-->
    <!--</dependency>-->
    <dependency>
    <groupId>mysql</groupId>
    <artifactId>mysql-connector-java</artifactId>
    <version>5.1.39</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-spark -->
    <dependency>
    <groupId>org.apache.phoenix</groupId>
    <artifactId>phoenix-spark</artifactId>
    <version>4.14.0-HBase-1.4</version>
    </dependency>
    <dependency>
    <groupId>com.lmax</groupId>
    <artifactId>disruptor</artifactId>
    <version>3.3.8</version>
    </dependency>
    <dependency>
    <groupId>org.scala-lang</groupId>
    <artifactId>scala-library</artifactId>
    <version>${scala.version}</version>
    </dependency>
    <!-- flink -->
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-scala_2.11</artifactId>
    <version>1.6.1</version>
    </dependency>
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-scala_2.11</artifactId>
    <version>1.6.1</version>
    </dependency>
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-clients_2.11</artifactId>
    <version>1.6.1</version>
    </dependency>
    <!-- spark-mllib -->
    <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-mllib_2.11</artifactId>
    <version>2.3.0</version>
    </dependency>
    <dependency>
    <groupId>org.jblas</groupId>
    <artifactId>jblas</artifactId>
    <version>1.2.3</version>
    </dependency>
    </dependencies>
    <build>
    <sourceDirectory>src/main/scala</sourceDirectory>
    <testSourceDirectory>src/test/scala</testSourceDirectory>
    <plugins>
    <plugin>
    <!-- see http://davidb.github.com/scala-maven-plugin -->
    <groupId>net.alchim31.maven</groupId>
    <artifactId>scala-maven-plugin</artifactId>
    <version>3.1.3</version>
    <executions>
    <execution>
    <goals>
    <goal>compile</goal>
    <goal>testCompile</goal>
    </goals>
    <configuration>
    <args>
    <arg>-dependencyfile</arg>
    <arg>${project.build.directory}/.scala_dependencies</arg>
    </args>
    </configuration>
    </execution>
    </executions>
    </plugin>
    <plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-surefire-plugin</artifactId>
    <version>2.13</version>
    <configuration>
    <useFile>false</useFile>
    <disableXmlReport>true</disableXmlReport>
    <!-- If you have classpath issue like NoDefClassError,... -->
    <!-- useManifestOnlyJar>false</useManifestOnlyJar -->
    <includes>
    <include>**/*Test.*</include>
    <include>**/*Suite.*</include>
    </includes>
    </configuration>
    </plugin>
    </plugins>
    </build>
    </project>


    二、新建一个类SparkKafkaconsumer:
    import kafka.serializer.StringDecoder
    import org.apache.spark.{SparkConf}
    import org.apache.spark.streaming.{Duration, StreamingContext}
    import org.apache.spark.streaming.kafka.KafkaUtils

    object SparkKafkaconsumer {
    def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf()
    .setMaster("local[*]")
    .setAppName("SparkStreamingKafka_Direct")
    sparkConf.set("spark.streaming.backpressure.enabled","true")
    val ssc = new StreamingContext(sparkConf,Duration(5000))
    val topics=Set("test3") //我们需要消费的kafka数据的topic
    val kafkaParams=Map(
    "metadata.broker.list"->"slave1:9092,slave2:9092,slave3:9092",// kafka的broker list地址
    "group.id"->"Kafka_Direct"
    )

    //创建一个从kafka获取数据的流
    val messages = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc,kafkaParams,topics)

    // 取出value
    val lines = messages.map(_._2)

    lines.foreachRDD(
    rdd => {
    val data = rdd.collect
    data.foreach(record => {
    SparkConnectionScalaNew.runmain(record,ssc)
    })
    }
    )


    ssc.start()// 真正启动程序
    ssc.awaitTermination()//阻塞等待
    }
    }


    三、新建一个类:SparkConnectionScalaNew

    import java.util.UUID

    import org.apache.spark.streaming.StreamingContext

    object SparkConnectionScalaNew {
    def runmain(keyValues:String,ssc:StreamingContext): Unit ={
    val uuid = UUID.randomUUID().toString
    SparkKafkaMessNew.messMain(keyValues,ssc,uuid)
    }
    }


    四、新建一个类:SparkKafkaMessNew
    import java.text.SimpleDateFormat
    import java.util.Date

    import org.apache.spark.sql.SQLContext
    import org.apache.spark.streaming.StreamingContext

    import scala.util.parsing.json.JSON
    import org.apache.phoenix.spark._

    object SparkKafkaMessNew {

    def regJson(json:Option[Any]) = json match {
    case Some(map: Map[String, Any]) => map
    }


    def messMain(mess:String,ssc:StreamingContext,uuid:String): Unit ={
    val jsonS = JSON.parseFull(mess)
    val first = regJson(jsonS)

    println(mess)//{"hello":"aa"}
    println(first)//Map(hello -> aa)

    val res = first.get("hello").toString//Some(aa)
    val name = res.replace("Some(","").replace(")","").trim//aa

    val date = new Date()
    val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val dateRe = df.format(date)



    val sc = ssc.sparkContext
    val sqlContext = new SQLContext(sc)

    val dataSet = List((uuid,name,dateRe))
    sc.parallelize(dataSet).saveToPhoenix(
    "TEST1.STUDENT", //表名
    Seq("ID","NAME","CREATETIME"),//字段名
    zkUrl = Some("slave1:2181")
    )
    }
    }
     
  • 相关阅读:
    libusb简介
    STM8S和STM8L调试串口中断的注意点
    QT 5.1.1 for Android 开发环境搭建与配置【Windows 7】
    【Luogu3806】点分治(点分治)
    NOIP2017+停课总结
    【BZOJ2301】【HAOI2011】Problem B(莫比乌斯反演)
    【Luogu3455】【POI2007】ZAP-Queries(莫比乌斯反演)
    【HDU1695】GCD(莫比乌斯反演)
    【BZOJ2816】【ZJOI2012】网络(Link-Cut Tree)
    莫比乌斯反演
  • 原文地址:https://www.cnblogs.com/zzmmyy/p/10338261.html
Copyright © 2020-2023  润新知