• spark集群安装部署


    1.在官网下载spark安装包

    # wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
    

    2.解压

    # tar -zxvf spark-2.4.8-bin-hadoop2.7.tgz -C /home/hadoop/app
    

    3.修改配置

    # cd  /home/hadoop/app/spark-2.4.8-bin-hadoop2.7/conf/
    # cp spark-env.sh.template spark-env.sh
    # cp slaves.template slaves
    # cp spark-defaults.conf.template spark-defaults.conf
    # vim spark-env.sh
    
    添加
    export HADOOP_CONF_DIR=/home/hadoop/app/hadoop-2.7.5/etc/hadoop
    export HADOOP_HOME=/home/hadoop/app/hadoop-2.7.5
    export JAVA_HOME=/opt/jdk1.8.0_202
    export SPARK_HOME=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7
    export SCALA_HOME=/home/hadoop/app/scala-2.11.8
    export SPARK_LOG_DIR=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7/logs
    export SPARK_PID_DIR=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7/logs/pid
    
    修改spark-defaults.conf
    # vim spark-defaults.conf
    
    添加
    spark.eventLog.enabled                             true
    spark.eventLog.dir                                 hdfs://ns1/spark/eventLog
    spark.rdd.compress                                 true
    spark.driver.mebaiwanry                                4G
    spark.yarn.historyServer.address                   dba-01:18080
    spark.history.ui.port                              18080
    spark.history.fs.logDirectory                      hdfs://ns1/spark/eventLog
    spark.yarn.maxAppAttempts                          4
    spark.yarn.stagingDir                              hdfs://ns1/spark/stagingDir
    
    spark.yarn.singleContainerPerNode                  false
    spark.yarn.allocator.waitTime                      60s
    spark.logConf                                      true
    spark.ui.killEnabled                               false
    spark.streaming.backpressure.initialRate           1000
    spark.streaming.kafka.maxRatePerPartition         10000
    spark.streaming.blockInterval                     1000
    spark.streaming.backpressure.enabled              true
    spark.streaming.receiver.maxRate                  10000
    spark.streaming.kafka.maxRetries                  10
    spark.default.parallelism                         64
    spark.streaming.dynamicAllocation.enabled         false
    spark.streaming.dynamicAllocation.minExecutors    1
    spark.streaming.dynamicAllocation.maxExecutors    50
    spark.shuffle.service.enabled             true
    spark.dynamicAllocation.enabled           true
    spark.dynamicAllocation.minExecutors      1
    spark.dynamicAllocation.maxExecutors      20
    spark.driver.maxResultSize  4g
    
    修改slaves
    # vim slaves
    添加
    dba-01
    dba-02
    dba-03
    

    4.创建目录

    # cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7
    # mkdir -p logs/pid
    # hdfs dfs -mkdir -p /spark/stagingDir
    # hdfs dfs -mkdir -p /spark/eventLog
    

    5.传输到其他节点

    # cd /home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-02:/home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-03:/home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-04:/home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-05:/home/hadoop/app
    

    6.添加spark环境变量

    # vim /etc/profile
    export SPARK_HOME=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7
    export PATH=$SPARK_HOME/bin
    
    # source /etc/profile
    

    7.任意一个节点启动spark集群

    # cd /home/hadoop/app/spark-2.4.8-bin-hadoop2.7/sbin
    # ./start-all.sh
    
  • 相关阅读:
    [组合][DP]luogu P3643 [APIO2016]划艇
    [倍增]luogu P4155 [SCOI2015]国旗计划
    [并查集][线段树]luogu P3273 [SCOI2011]棘手的操作
    pytest警告DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
    docker-compose5分钟搭建wordpress博客网站
    Docker安装入门
    Windows10安装wget命令
    CRC (Cyclic Redundancy Check)
    Linux学习笔记
    Linux学习笔记
  • 原文地址:https://www.cnblogs.com/slqdba/p/15716631.html
Copyright © 2020-2023  润新知