查看spark版本 spark-submit --version
查看hadoop版本 Hadoop version
下载spark 对应版本 spark-2.0.2
下载idea的scala插件https://plugins.jetbrains.com/plugin/1347-scala
package com.bj58
import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf
/** * Hello world! * */ //object App extends Application {
object App{
def main(args: Array[String]) { println( "Hello World!" )
// val logFile = "/usr/local/spark/spark-1.3.1-bin-hadoop2.6/README.md"
/**为你的spark安装目录**/ // val conf = new SparkConf().setAppName("App") //
val sc = new SparkContext(conf) // val logData = sc.textFile(logFile,2).cache() //
val numAs = logData.filter(line => line.contains("a")).count() //
val numBs = logData.filter(line => line.contains("b")).count() // //
println("Lines with a: %s,Lines with b: %s".format(numAs,numBs))
if (args.length < 2) { System.err.println("Usage: <infile> <outfile>")
System.exit(1) }
val conf = new SparkConf().setAppName("App")
val sc = new SparkContext(conf)
val line = sc.textFile(args(0))
var counts = line.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_+_) // // .collect()
counts.collect().foreach(println)
counts.saveAsTextFile(args(1))
sc.stop() }
}
打包并启动jar
# --queue root.online.hdp_teu_dia
$sparkbin --class "com.energy1010.App"
--master yarn
--deploy-mode cluster
--queue root.offline.normal
--name Test
--executor-memory 10G
--num-executors 20
--executor-cores 2
--driver-memory 10g
./spark.jar ${inputpath} ${Outpath}
终端输出:
17/05/09 16:53:37 INFO yarn.Client main: Application report for application_1491903146022_2119985 (state: RUNNING)
17/05/09 16:53:38 INFO yarn.Client main: Application report for application_1491903146022_2119985 (state: FINISHED)
17/05/09 16:53:38 INFO yarn.Client main:
client token: N/A
diagnostics: N/A
ApplicationMaster host: 10.126.14.136
ApplicationMaster RPC port: 0
queue: root.offline.normal
start time: 1494319993385
final status: SUCCEEDED
tracking URL: http://tjtx-81-187.org:9088/proxy/application_1491903146022_2119985/history/application_1491903146022_2119985/1
user: hdp_teu_dia
17/05/09 16:53:38 INFO util.ShutdownHookManager Thread-3: Shutdown hook called
17/05/09 16:53:38 INFO util.ShutdownHookManager Thread-3: Deleting directory /tmp/spark-79598c10-7db4-4ead-9a44-3ce7681c2cee
done:20170509 16:53:38
http://tjtx-81-187.org:9088/cluster/apps