• Hadoop记录-Apache hadoop+spark集群部署


    Hadoop+Spark集群部署指南

    (多节点文件分发、集群操作建议salt/ansible)

    1.集群规划
    节点名称 主机名 IP地址 操作系统
    Master centos1 192.168.0.1 CentOS 7.2
    Slave1 centos2 192.168.0.2 CentOS 7.2
    Slave2 centos2 192.168.0.3 Centos 7.2
    2.基础环境配置
    2.1 hostname配置
    1)修改主机名
    在192.168.0.1 root用户下执行:
    hostnamectl set-hostname centos1
    在192.168.0.2 root用户下执行:
    hostnamectl set-hostname centos2
    在192.168.0.3 root用户下执行:
    hostnamectl set-hostname centos3
    2)加入主机映射
    在目标服务器(192.168.0.1 192.168.0.2 192.168.0.3)root用户下执行:
    vim /etc/hosts
    192.168.0.1 centos1
    192.168.0.2 centos2
    192.168.0.3 centos3

    2.2 关闭selinux
    在目标服务器(192.168.0.1 192.168.0.2 192.168.0.3)root用户下执行:
    sed -i '/^SELINUX/s/=.*/=disabled/' /etc/selinux/config
    setenforce 0
    2.3 修改Linux最大打开文件数
    在目标服务器(192.168.0.1 192.168.0.2 192.168.0.3)root用户下执行:
    vim /etc/security/limits.conf
    * soft nofile 65536
    * hard nofile 65536
    2.4 关闭防火墙
    在目标服务器(192.168.0.1 192.168.0.2 192.168.0.3)root用户下执行
    systemctl disable firewalld.service
    systemctl stop firewalld.service
    systemctl status firewalld.service
    2.5初始化服务器
    1)初始化服务器
    在目标服务器(192.168.0.1 192.168.0.2 192.168.0.1 192.168.0.3)root用户下执行
    groupadd -g 6000 hadoop
    useradd -s /bin/bash -G hadoop -m hadoop
    passwd hadoop
    mkdir -p /usr/app/jdk
    chown –R hadoop:hadoop /usr/app
    2)配置sudo
    在目标服务器(192.168.0.1 192.168.0.2 192.168.0.3)root用户下执行
    vim /etc/sudoers.d/hadoop
    hadoop ALL=(ALL) ALL
    hadoop ALL=(ALL) NOPASSWD: ALL
    Defaults !env_reset
    3)配置ssh无密登录
    在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下执行
    su hadoop
    ssh-keygen -t rsa
    合并id_rsa_pub文件
    在192.168.0.1 hadoop用户下执行
    cat ~/.ssh/id_rsa.pub >> /home/hadoop/.ssh/authorized_keys
    chmod 600 ~/.ssh/authorized_keys
    scp ~/.ssh/authorized_keys hadoop@192.168.0.2:/home/hadoop/.ssh
    输入密码:hadoop
    在192.168.0.2 hadoop用户下执行
    cat ~/.ssh/id_rsa.pub >> /home/hadoop/.ssh/authorized_keys
    scp ~/.ssh/authorized_keys hadoop@192.168.0.3:/home/hadoop/.ssh
    输入密码:hadoop
    在192.168.0.3 hadoop用户下执行
    cat ~/.ssh/id_rsa.pub >> /home/hadoop/.ssh/authorized_keys
    scp ~/.ssh/authorized_keys hadoop@192.168.0.1:/home/hadoop/.ssh
    scp ~/.ssh/authorized_keys hadoop@192.168.0.2:/home/hadoop/.ssh
    覆盖之前的文件
    输入密码:hadoop
    在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下执行
    ssh hadoop@192.168.0.1
    ssh hadoop@192.168.0.2
    ssh hadoop@192.168.0.3
    3.程序包准备
    #上传以下程序包到服务器上
    jdk-8u192-linux-x64.tar.gz
    hadoop-2.8.5.tar.gz
    scala-2.11.12.tar.gz
    spark-2.4.1-bin-hadoop2.7.tar.gz
    zookeeper-3.4.5.tar.gz
    #解压
    tar xvf hadoop-2.8.5.tar.gz -C /usr/app
    tar xvf scala-2.11.12.tar.gz -C /usr/app
    tar xvf spark-2.4.1-bin-hadoop2.7.tar.gz -C /usr/app
    tar xvf zookeeper-3.4.5.tar.gz -C /usr/app
    tar xvf jdk-8u192-linux-x64.tar.gz -C /usr/app/jdk
    mv hadoop-2.8.5 hadoop
    mv scala-2.11.12 scala
    mv spark-2.4.1-bin-hadoop2.7 spark
    mv zookeeper-3.4.5 zookeeper
    #配置/etc/profile
    export JAVA_HOME=/usr/app/jdk/jdk1.8.0_192
    export PATH=$JAVA_HOME/bin:$PATH
    export HADOOP_HOME=/usr/app/hadoop
    export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    export SPARK_HOME=/usr/app/spark/spark-2.4.1-bin-hadoop2.7
    export PATH=$SPARK_HOME/bin:$PATH
    4.Zookeeper集群部署
    #在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下执行
    cd /usr/app/zookeeper/conf
    cat >> zoo.cfg << EOF
    tickTime=2000
    initLimit=10
    syncLimit=5
    dataDir=/usr/app/zookeeper/data/zookeeper
    dataLogDir=/usr/app/zookeeper/logs
    clientPort=2181
    maxClientCnxns=1000
    server.1= 192.168.0.1:2888:3888
    server.2= 192.168.0.2:2888:3888
    server.3= 192.168.0.3:2888:3888
    EOF
    #master节点写1 slave节点依次类推
    echo 1>> /usr/app/zookeeper/data/zookeeper/myid
    #启动
    nohup /usr/app/zookeeper/bin/zkServer.sh start &
    5.Hadoop集群部署
    #在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下执行
    cd /usr/app/hadoop/etc/hadoop
    在hadoop-env.sh、yarn-env.sh
    加入:export JAVA_HOME=/usr/app/jdk/jdk1.8.0_192
    到/usr/app/Hadoop/etc/hadoop目录下,根据实际情况修改里面的IP主机名、目录等。

    core-site.xml

    <configuration>
        <property>
            <name>hadoop.tmp.dir</name>
            <value>/usr/app/hadoop/tmp</value>
        </property>
        <property>
            <name>fs.default.name</name>
            <value>hdfs://mycluster</value>
        </property>
        <property>
            <name>io.compression.codecs</name>
            <value>org.apache.hadoop.io.compress.GzipCodec,
                org.apache.hadoop.io.compress.DefaultCodec,
                org.apache.hadoop.io.compress.BZip2Codec,
                org.apache.hadoop.io.compress.SnappyCodec
            </value>
        </property>
        <property>
            <name>hadoop.proxyuser.root.hosts</name>
            <value>*</value>
        </property>
        <property>
            <name>hadoop.proxyuser.root.groups</name>
            <value>*</value>
        </property>
        <property>
            <name>ha.zookeeper.quorum</name>
            <value>192.168.0.1:2181,192.168.0.2:2181,192.168.0.3:2181</value>
        </property>
    </configuration>
    

    hdfs-site.xml

    <configuration>
        <property>
            <name>dfs.replication</name>
            <value>3</value>
        </property>
        <property>
            <name>dfs.permissions.enabled</name>
            <value>false</value>
        </property>
        <property>
            <name>dfs.nameservices</name>
            <value>mycluster</value>
        </property>
        <property>
            <name>dfs.ha.namenodes.mycluster</name>
            <value>nn1,nn2</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-address.mycluster.nn1</name>
            <value>192.168.0.1:9000</value>
        </property>
        <property>
            <name>dfs.namenode.http-address.mycluster.nn1</name>
            <value>192.168.0.1:50070</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-address.mycluster.nn2</name>
            <value>192.168.0.2:9000</value>
        </property>
        <property>
            <name>dfs.namenode.http-address.mycluster.nn2</name>
            <value>192.168.0.2:50070</value>
        </property>
        <property>
            <name>dfs.namenode.shared.edits.dir</name>
            <value>qjournal://192.168.0.1:8485;192.168.0.2:8485;192.168.0.3:8485/mycluster</value>
        </property>
        <property>
            <name>dfs.journalnode.edits.dir</name>
            <value>/usr/app/hadoop/data/journaldata</value>
        </property>
        <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:///usr/app/hadoop/data/dfs/nn/local</value>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>/usr/app/hadoop/data/dfs/dn/local</value>
        </property>
        <property>
            <name>dfs.client.failover.proxy.provider.mycluster</name>
            <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
        </property>
        <property>
            <name>dfs.ha.fencing.methods</name>
            <value>shell(/bin/true)</value>
        </property>
        <property>
            <name>dfs.ha.fencing.ssh.private-key-files</name>
            <value>/home/hadoop/.ssh/id_rsa</value>
        </property>
        <property>
            <name>dfs.ha.fencing.ssh.connect-timeout</name>
            <value>10000</value>
        </property>
        <property>
            <name>dfs.ha.automatic-failover.enabled</name>
            <value>true</value>
        </property>
    </configuration>

    mapred-site.xml 

    <configuration>
        <property>
            <name>mapreduce.framework.name</name>
            <value>yarn</value>
        </property>
    </configuration>
    

    yarn-site.xml

    <configuration>
        <property>
            <name>yarn.nodemanager.aux-services</name>
            <value>mapreduce_shuffle</value>
        </property>
        <property>
            <name>yarn.resourcemanager.ha.enabled</name>
            <value>true</value>
        </property>
        <property>
            <name>yarn.resourcemanager.cluster-id</name>
            <value>rmCluster</value>
        </property>
        <property>
            <name>yarn.resourcemanager.ha.rm-ids</name>
            <value>rm1,rm2</value>
        </property>
        <property>
            <name>yarn.resourcemanager.hostname.rm1</name>
            <value>192.168.0.1</value>
        </property>
        <property>
            <name>yarn.resourcemanager.hostname.rm2</name>
            <value>192.168.0.2</value>
        </property>
        <property>
            <name>yarn.resourcemanager.zk-address</name>
            <value>192.168.0.1:2181,192.168.0.2:2181,192.168.0.3:2181</value>
        </property>
        <property>
            <name>yarn.resourcemanager.recovery.enabled</name>
            <value>true</value>
        </property>
        <property>
            <name>yarn.resourcemanager.store.class</name>   
            <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
        </property>
        <property>
            <name>yarn.nodemanager.aux-services</name>
            <value>mapreduce_shuffle</value>
        </property>
        <property>
            <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
            <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
        <property>
            <name>yarn.nodemanager.pmem-check-enabled</name>
            <value>false</value>
        </property>
    
        <property>
            <name>yarn.nodemanager.vmem-check-enabled</name>
            <value>false</value>
        </property>
    
        <property>
            <name>yarn.nodemanager.resource.memory-mb</name>
            <value>20480</value>
        </property>
        <property>
            <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
            <value>97.0</value>
        </property>
    </configuration>

    slaves加入192.168.0.1 192.168.0.2 192.168.0.3

    hadoop-env.sh加入

    export JAVA_HOME=/usr/app/jdk/jdk1.8.0_192

    export HADOOP_HEAPSIZE=12288

    export HADOOP_PORTMAP_OPTS="-Xmx4096m $HADOOP_PORTMAP_OPTS"

    export HADOOP_CLIENT_OPTS="-Xmx4096m $HADOOP_CLIENT_OPTS"

    yarn-env.sh加入

    export JAVA_HOME=/usr/app/jdk/jdk1.8.0_192

    JAVA_HEAP_MAX=-Xmx2048m 

    #新建目录
    mkdir –p /usr/app/Hadoop/tmp
    mkdir –p /usr/app/Hadoop/data/dfs/nn/local
    mkdir –p /usr/app/Hadoop/data/dfs/nn/local
    #启动
    在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下执行
    hadoop-daemon.sh start journalnode
    在192.168.0.1 hadoop用户下执行
    hdfs namenode –format
    hadoop-daemon.sh start namenode
    在192.168.0.2 hadoop用户下操作
    hdfs namenode –bootstrapStandby
    在192.168.0.1 hadoop用户下执行
    hdfs zkfc –formatZK
    在192.168.0.2 hadoop用户下操作
    hadoop-daemon.sh start namenode
    在192.168.0.1 192.168.0.2 hadoop用户下操作
    hadoop-daemon.sh start zkfc
    在192.168.0.1 192.168.0.2 hadoop用户下操作
    yarn-daemon.sh start resourcemanager
    在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下操作
    yarn-daemon.sh start nodemanager
    在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下操作
    hadoop-daemon.sh start datanode
    #验证
    http://192.168.0.1:50070查看hadoop状态
    http://192.168.0.1:8088查看yarn集群状态
    6.Spark集群部署
    #在192.168.0.1 192.168.0.2 192.168.0.3 hadoop用户下执行
    cd /usr/app/spark/conf
    在spark-env.sh加入

    export JAVA_HOME=/usr/app/jdk/jdk1.8.0_192
    export SCALA_HOME=/usr/app/scala
    export HADOOP_HOME=/usr/app/hadoop
    export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
    export SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://mysqlcluster/tmp/spark/event"
    export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
    export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${HADOOP_HOME}/lib/native
    export PYSPARK_PYTHON=/usr/app/python/venv/bin/python
    export PYSPARK_DRIVER_PYTHON=/usr/app/python/venv/bin/python

    在spark-defaults.conf加入

    spark.master yarn
    spark.eventLog.enabled true
    spark.eventLog.dir hdfs://fate-cluster/tmp/spark/event
    # spark.serializer org.apache.spark.serializer.KryoSerializer
    # spark.driver.memory 5g
    # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
    spark.yarn.jars hdfs://mycluster/tmp/spark/jars/*.jar

    在slaves加入192.168.0.2 192.168.0.3

    /usr/app/spark/spark-2.4.1-bin-hadoop2.7/jars

    hdfs dfs -mkdir -p /tmp/spark/jars

    hdfs dfs -mkdir -p /tmp/spark/event

    hdfs dfs -put *jar /tmp/spark/jars


    #启动
    /usr/app/spark/sbin/start-all.sh
    #验证
    /usr/app/spark/bin/spark-shell --master yarn --deploy-mode client

  • 相关阅读:
    【动手学深度学习pytorch】学习笔记 8.1 序列模型
    神经网络与深度学习(邱锡鹏)编程练习6 RNN
    神经网络与深度学习(邱锡鹏)编程练习4 FNN 反向传播 梯度下降 numpy
    神经网络与深度学习(邱锡鹏)编程练习4 FNN 均方误差 numpy vs. pytorch
    扩展学习: 中文分词;词云制作
    神经网络与深度学习(邱锡鹏)编程练习4 FNN 正向传播 pytorch
    神经网络与深度学习(邱锡鹏)编程练习4 FNN 交叉熵二分类 numpy vs. pytorch
    【动手学深度学习pytorch】学习笔记 8.4 循环神经网络
    神经网络与深度学习(邱锡鹏)编程练习4 FNN 反向传播 梯度下降 pytorch
    神经网络与深度学习(邱锡鹏)编程练习5 CNN pytorch版
  • 原文地址:https://www.cnblogs.com/xinfang520/p/11691332.html
Copyright © 2020-2023  润新知