1.在官网下载spark安装包
# wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
2.解压
# tar -zxvf spark-2.4.8-bin-hadoop2.7.tgz -C /home/hadoop/app
3.修改配置
# cd /home/hadoop/app/spark-2.4.8-bin-hadoop2.7/conf/
# cp spark-env.sh.template spark-env.sh
# cp slaves.template slaves
# cp spark-defaults.conf.template spark-defaults.conf
# vim spark-env.sh
添加
export HADOOP_CONF_DIR=/home/hadoop/app/hadoop-2.7.5/etc/hadoop
export HADOOP_HOME=/home/hadoop/app/hadoop-2.7.5
export JAVA_HOME=/opt/jdk1.8.0_202
export SPARK_HOME=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7
export SCALA_HOME=/home/hadoop/app/scala-2.11.8
export SPARK_LOG_DIR=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7/logs
export SPARK_PID_DIR=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7/logs/pid
修改spark-defaults.conf
# vim spark-defaults.conf
添加
spark.eventLog.enabled true
spark.eventLog.dir hdfs://ns1/spark/eventLog
spark.rdd.compress true
spark.driver.mebaiwanry 4G
spark.yarn.historyServer.address dba-01:18080
spark.history.ui.port 18080
spark.history.fs.logDirectory hdfs://ns1/spark/eventLog
spark.yarn.maxAppAttempts 4
spark.yarn.stagingDir hdfs://ns1/spark/stagingDir
spark.yarn.singleContainerPerNode false
spark.yarn.allocator.waitTime 60s
spark.logConf true
spark.ui.killEnabled false
spark.streaming.backpressure.initialRate 1000
spark.streaming.kafka.maxRatePerPartition 10000
spark.streaming.blockInterval 1000
spark.streaming.backpressure.enabled true
spark.streaming.receiver.maxRate 10000
spark.streaming.kafka.maxRetries 10
spark.default.parallelism 64
spark.streaming.dynamicAllocation.enabled false
spark.streaming.dynamicAllocation.minExecutors 1
spark.streaming.dynamicAllocation.maxExecutors 50
spark.shuffle.service.enabled true
spark.dynamicAllocation.enabled true
spark.dynamicAllocation.minExecutors 1
spark.dynamicAllocation.maxExecutors 20
spark.driver.maxResultSize 4g
修改slaves
# vim slaves
添加
dba-01
dba-02
dba-03
4.创建目录
# cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7
# mkdir -p logs/pid
# hdfs dfs -mkdir -p /spark/stagingDir
# hdfs dfs -mkdir -p /spark/eventLog
5.传输到其他节点
# cd /home/hadoop/app
# scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-02:/home/hadoop/app
# scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-03:/home/hadoop/app
# scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-04:/home/hadoop/app
# scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-05:/home/hadoop/app
6.添加spark环境变量
# vim /etc/profile
export SPARK_HOME=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7
export PATH=$SPARK_HOME/bin
# source /etc/profile
7.任意一个节点启动spark集群
# cd /home/hadoop/app/spark-2.4.8-bin-hadoop2.7/sbin
# ./start-all.sh