4. 安装Spark
4.1. 准备目录
tar -xf /home/hadoop/install/spark-2.1.0-bin-without-hadoop.tgz -C /opt/cloud/packages/ ln -s /opt/cloud/packages/spark-2.1.0-bin-without-hadoop /opt/cloud/bin/spark ln -s /opt/cloud/packages/spark-2.1.0-bin-without-hadoop/conf /opt/cloud/etc/spark mkdir -p /opt/cloud/logs/spark mkdir -p /opt/cloud/data/spark
4.2. 设置环境变量
vi ~/.bashrc export SPARK_HOME=/opt/cloud/bin/spark export PATH=$SPARK_HOME/bin:$PATH
即刻生效
source ~/.bashrc
4.3. 修改配置文件
4.3.1. spark-env.sh
cd /opt/cloud/etc/spark mv spark-env.sh.template spark-env.sh vi spark-env.sh
export JAVA_HOME=/usr/lib/jvm/java export HADOOP_HOME=/opt/cloud/bin/hadoop export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export SPARK_DIST_CLASSPATH=$(hadoop classpath)[1] export SPARK_LOCAL_DIRS=/opt/cloud/data/spark export SPARK_LOG_DIR=/opt/cloud/logs/spark export SPARK_PID_DIR=/opt/cloud/hdfs/tmp
4.3.2. spark-defaults.conf
mv spark-defaults.conf.template spark-defaults.conf vi spark-defaults.conf
spark.driver.memory 512m
4.4. 测试
通过修改MASTER临时变量,测试yarn模式
export MASTER=yarn ./bin/run-example SparkPi 1000
4.5. 卸载
复原环境变量,删除spark相关行
vi ~/.bashrc
删除临时数据和目录
rm /opt/cloud/bin/spark rm /opt/cloud/etc/spark -rf rm /opt/cloud/packages/spark-2.1.0-bin-without-hadoop/ -rf rm /opt/cloud/data/spark/ -rf rm /opt/cloud/logs/spark/ -rf