• Spak之开发代码 _pom 配置


    1.spark on yarn

    import org.apache.spark.SparkContext
    import org.apache.spark.SparkContext._
    
    
    
    /**
     * Created by zzy on 8/27/15.
     */
    object SaprkOnYarn {
      def main(args: Array[String]) {
    
    
      if(args.length !=2){
        println("args lenght !=2")
        System.exit(0)
      }
      //    val conf = new SparkConf()
      //    conf.setMa
    
    
      val sc = new SparkContext     //
    
      val file = sc.textFile(args(0))     //
    
      file.cache    //
    
      val tmp = file.flatMap(_.split(" ")).map(line => (line,1)).reduceByKey(_ + _)
    
      tmp.saveAsTextFile(args(1))
    
    }
    }
    

    2.spark on standalone

    import org.apache.spark.SparkConf
    import org.apache.spark.SparkContext
    import org.apache.spark.SparkContext._
    
    /**
     * Created by zzy on 8/27/15.
     */
    object SsdTest {
      def main(args: Array[String]) {
    
        if(args.length !=2){
          println("args lenght !=2")
          System.exit(0)
        }
    //    val conf = new SparkConf()
    //    conf.setMa
    
        val conf  = new SparkConf()     //
        conf.setMaster("spark://192.168.122.213:7077")    //
        conf.setSparkHome("/usr/local/spark/spark-1.4.1-bin-hadoop2.6")    //
        conf.setAppName("StandaloneSparktest")     //
        conf.set("SPARK_EXECUTOR_MEMORY", "1g")     //
    
        val sc = new SparkContext(conf)     //
    
        val file = sc.textFile(args(0))     //
    
        file.cache    //
    
        val tmp = file.flatMap(_.split(" ")).map(line => (line,1)).reduceByKey(_ + _)
    
        tmp.saveAsTextFile(args(1))
    
      }
    
    }
    

    3.pom 文件配置

    <repositories>
        <repository>
            <id>Akka repository</id>
            <url>http://repo.akka.io/releases</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/.</url>
        </repository>
        <repository>
            <id>jboss</id>
            <url>http://repository.jboss.org/nexus/content/groups/public-jboss</url>
        </repository>
        <repository>
            <id>Sonatype snapshots</id>
            <url>http://oss.sonatype.org/content/repositories/snapshots/</url>
        </repository>
    </repositories>
    
    <build>
        <sourceDirectory>src/</sourceDirectory>
        <testSourceDirectory>src/</testSourceDirectory>
    
        <plugins>
            <plugin>
                <groupId>org.scala-tools</groupId>
                <artifactId>maven-scala-plugin</artifactId>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <scalaVersion>2.10.3</scalaVersion>
                </configuration>
            </plugin>
    
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.2</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
    
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                    <resource>reference.conf</resource>
                                </transformer>
    
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                </transformer>
    
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    
    <dependencies>
        <!--spark-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.5.0-cdh5.3.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-tools_2.10</artifactId>
            <version>1.1.0-cdh5.2.0-SNAPSHOT</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-assembly_2.10</artifactId>
            <version>1.2.0-cdh5.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-repl_2.10</artifactId>
            <version>1.2.0-cdh5.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-catalyst_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-network-common_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
    
        <!--spark on yarn-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-yarn_2.10</artifactId>
            <version>1.2.0-cdh5.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-network-yarn_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
    
        <!--spark-sql-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive-thriftserver_2.10</artifactId>
            <version>1.2.0-cdh5.3.3</version>
        </dependency>
    
        <!--spark-streaming-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-flume_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-flume-sink_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka_2.10</artifactId>
            <version>1.2.0-cdh5.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka-assembly_2.10</artifactId>
            <version>1.3.0-cdh5.4.0</version>
        </dependency>
    </dependencies>

    仅此献给努力的你我!

    
    
  • 相关阅读:
    Restful、SOAP、RPC、SOA、微服务之间的区别
    SOA(面向服务的架构.)、RPC(远程过程调用)思想
    facade层,service 层,domain层,dao 层设计
    Mac下配置alias,zsh终端命令别名
    .bash_profile 和.zshrc
    什么是零担物流?零担物流的五大特点
    零担是什么意思,零担物流和快递有什么区别
    idea vm options
    idea中 VM options配置
    ES配置生命周期策略
  • 原文地址:https://www.cnblogs.com/chaoren399/p/4765181.html
Copyright © 2020-2023  润新知