• 3 hadoop3.0高可用安装 Sky


    hadoop 3.0安装
    节点角色分配:
    vm1 namenode1 ---        zookeeper1  ZKFC1  jornalnode1  resourcemanager
    vm2 namenode2 datanode1  zookeeper2  ZKFC2  jornalnode2  resourcemanager
    vm3 ---       datanode2  zookeeper3  ---    jornalnode3  ---
    https://blog.csdn.net/qq_31583183/article/details/89575981
    zookeeper下载:http://archive.apache.org/dist/zookeeper/
    
    1 安装java环境
    tar -xf jdk-8u131-linux-x64.tar.gz -C /usr/local/
    vi /etc/profile
    export JAVA_HOME=/usr/local/jdk1.8.0_131
    export CLASS_PATH=$JAVA_HOME/bin/lib:$JAVA_HOME/jre/lib:$JAVA_HOME/lib/tool.jar
    export PATH=${JAVA_HOME}/bin:$PATH
    source /etc/profile
    
    2 ssh互信,免密登录
    192.168.1.84    vm1    namenode
    192.168.1.85    vm2    datnode
    192.168.1.86    vm3    datanode
    创建hadoop用户:(namenode和datanode均需要)
    useradd hadoop
    passwd hadoop
    
    在namenode上生成秘钥:
    # su - hadoop
    # ssh-keygen
    在/home/hadoop/.ssh/目录下生成:id_rsa(私钥)和id_rsa.pub(公钥)
    将公钥id_rsa.pub上传至各datanode节点上
    
    在各datanode节点上:(复制namenode上的id_rsa.pub到authorized_keys)
    # vi /home/hadoop/.ssh/authorized_keys
    文件权限一定是600:
    chmod 700 /home/hadoop/.ssh
    chmod 600 /home/hadoop/.ssh/authorized_keys
    
    3 zookeeper集群搭建
    zookeeper下载:http://archive.apache.org/dist/zookeeper/
    https://www.aboutyun.com/thread-26880-1-1.html
    (3台zk机器均需要如下操作)
    下载解压:
    tar -xf zookeeper-3.3.5.tar.gz -C /root/hdfs/
    cd zookeeper-3.3.5/
    创建zookeeper的存放数据的文件夹:
    mkdir data
    配置zookeeper的环境变量:
    vim /etc/profile
    export JAVA_HOME=/usr/local/jdk1.8.0_131
    export PATH=$PATH:$JAVA_HOME/bin
    export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar
    export ZOOKEEPER_HOME=/root/hdfs/zookeeper-3.3.5
    export PATH=$PATH:$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin
    
    修改配置文件:
    cd /root/hdfs/zookeeper-3.3.5/conf
    先把zoo_sample.cfg改为zoo.cfg
    mv zoo_sample.cfg zoo.cfg
    
    #修改的dataDir文件路径,配置临时文件路径内容即可,配置的文件路径需要提前创建好
    # example sakes.
    dataDir=/root/hdfs/zookeeper-3.3.5/data
    # the port at which the clients will connect
    clientPort=2181
    # 配置server 几台机器就配置几台即可,注意server的数字,我们在后续需要用到
    server.1=vm1:2888:3888
    server.2=vm2:2888:3888
    server.3=vm3:2888:3888
    (以上配置在其它zk节点同样操作)
    创建需要的文件夹:
    vm1机器上:echo 1 > /root/hdfs/zookeeper-3.3.5/data/myid
    vm2机器上:echo 2 > /root/hdfs/zookeeper-3.3.5/data/myid
    vm3机器上:echo 3 > /root/hdfs/zookeeper-3.3.5/data/myid
    
    启动并查看状态(3台zk均需启动):
    zkServer.sh start
    zkServer.sh status
    
    # zkServer.sh status
    JMX enabled by default
    Using config: /root/hdfs/zookeeper-3.3.5/bin/../conf/zoo.cfg
    Mode: follower
    -----
    Mode: leader
    
    4 安装hadoop
    # su - hadoop
    # cd  /home/hadoop/
    下载hadoop包:
    https://archive.apache.org/dist/hadoop/common/
    https://mirrors.cnnic.cn/apache/hadoop/common/
    tar -xf hadoop-3.2.2.tar.gz
    cd /home/hadoop/hadoop-3.2.2/etc/hadoop/
    
    修改配置:
    vi  hadoop-env.sh
    export JAVA_HOME=/usr/local/jdk1.8.0_131
    export HDFS_NAMENODE_USER=root
    export HDFS_DATANODE_USER=root
    export HDFS_SECONDARYNAMENODE_USER=root
    export HDFS_ZKFC_USER=root
    export HDFS_JOURNALNODE_USER=root
    
    vi core-site.xml
    <configuration>
    <property>
            <name>fs.defaultFS</name>
            <value>hdfs://ns1</value>
        </property>
        <property>
            <name>dfs.nameservices</name>
            <value>ns1</value>
        </property>
    
        <property>
            <name>hadoop.tmp.dir</name>
            <value>/hadoop/tmp</value>
        </property>
        <property>
            <name>ha.zookeeper.quorum</name>
            <value>vm1:2181,vm1:2181,vm1:2181</value>
        </property>
    </configuration>
    
    vi hdfs-site.xml
    <configuration>
        <property>
            <name>dfs.replication</name>
            <value>3</value>
        </property>
        <property>
            <name>dfs.nameservices</name>
            <value>ns1</value>
        </property>
        <property>
            <name>dfs.ha.namenodes.ns1</name>
            <value>nn1,nn2</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-address.ns1.nn1</name>
            <value>vm1:8020</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-address.ns1.nn2</name>
            <value>vm2:8020</value>
        </property>
        <property>
            <name>dfs.namenode.http-address.ns1.nn1</name>
            <value>vm1:50070</value>
        </property>
        <property>
            <name>dfs.namenode.http-address.ns1.nn2</name>
            <value>vm2:50070</value>
        </property>
        <property>
            <name>dfs.namenode.shared.edits.dir</name>
            <value>qjournal://vm1:8485;vm2:8485;vm3:8485/ns1</value>
        </property>
        <property>
            <name>dfs.client.failover.proxy.provider.ns1</name>
            <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
        </property>
        <property>
            <name>dfs.ha.fencing.methods</name>
            <value>sshfence</value>
        </property>
        <property>
            <name>dfs.ha.fencing.ssh.private-key-files</name>
            <value>/root/.ssh/id_rsa</value>
        </property>
        <property>  
            <name>dfs.name.dir</name>  
            <value>/hadoop/namedata</value>  
        </property>
        <property>  
            <name>dfs.data.dir</name>  
            <value>/hadoop/data</value>  
        </property>        
        <property>
            <name>dfs.journalnode.edits.dir</name>
            <value>/hadoop/journalnode</value>
        </property>
        <property>
            <name>dfs.ha.automatic-failover.enabled</name>
            <value>true</value>
        </property>
    </configuration>
    
    
    vi mapred-site.xml
    <configuration>
        <property>
            <name>mapreduce.framework.name</name>  
            <value>yarn</value>  
        </property>
    </configuration>
    
    vi yar-site.xml
    <configuration>
    <!-- Site specific YARN configuration properties -->
        <property>  
           <name>yarn.resourcemanager.ha.enabled</name>  
           <value>true</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.cluster-id</name>  
           <value>rmcluster</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.ha.rm-ids</name>  
           <value>rm1,rm2</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.hostname.rm1</name>  
           <value>vm1</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.hostname.rm2</name>  
           <value>vm2</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.zk-address</name>  
           <value>vm1:2181,vm2:2181,vm3:2181</value>  
        </property>   
        <property>  
           <name>yarn.resourcemanager.recovery.enabled</name>  
           <value>true</value>  
        </property>   
        <property>  
           <name>yarn.resourcemanager.store.class</name>  
           <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>  
        </property>
    </configuration>
    
    vi /home/hadoop/hadoop-3.2.2/etc/hadoop/workers
    vm2
    vm3
    
    启动hadoop:
    (1)journalnode 节点上执行
        hadoop/sbin/hadoop-daemon.sh start journalnode
        使用jps 能看到 JournalNode
    (2)在第一个namenode上格式化namenode,格式化zk,启动namenode
        bin/hdfs namenode -format
        bin/hdfs zkfc -formatZK
        sbin/hadoop-daemon.sh start namenode 
        bin/hdfs --daemon start zkfc 
        jps 命令可以看到 NameNode DFSZKFailoverController
     
    (3)在第二个namenode上,同步数据,启动第二个namenode
        bin/hdfs namenode -bootstrapStandby
        sbin/hadoop-daemon.sh start namenode
        bin/hdfs --daemon start zkfc 
        jps 会发现有namenode DFSZKFailoverController
     
    (4)所有datanode上启动 datanode
        sbin/hadoop-daemon.sh start datanode
        jps 会发现有datanode
    
    查看namenode的状态:
    # hdfs haadmin -getAllServiceState
    vm1:8020                                           active    
    vm2:8020                                           standby   
    # hdfs haadmin -getServiceState nn1
    active
    # hdfs haadmin -getServiceState nn2
    standby
    
    ps -ef |grep hadoop
    root     107962      1  3 09:27 pts/3    00:01:07 /usr/local/jdk1.8.0_131/bin/java -Dproc_journalnode -Djava.net.preferIPv4Stack=true -Dyarn.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dyarn.log.file=hadoop-root-journalnode-vm1.log -Dyarn.home.dir=/root/hdfs/hadoop-3.2.2 -Dyarn.root.logger=INFO,console -Djava.library.path=/root/hdfs/hadoop-3.2.2/lib/native -Dhadoop.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dhadoop.log.file=hadoop-root-journalnode-vm1.log -Dhadoop.home.dir=/root/hdfs/hadoop-3.2.2 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,RFA -Dhadoop.policy.file=hadoop-policy.xml -Dhadoop.security.logger=INFO,NullAppender org.apache.hadoop.hdfs.qjournal.server.JournalNode
    root     109743      1  6 09:31 pts/3    00:01:52 /usr/local/jdk1.8.0_131/bin/java -Dproc_namenode -Djava.net.preferIPv4Stack=true -Dhdfs.audit.logger=INFO,NullAppender -hadoop.security.logger=INFO,RFAS -Dyarn.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dyarn.log.file=hadoop-root-namenode-vm1.log -Dyarn.home.dir=/root/hdfs/hadoop-3.2.2 -Dyarn.root.logger=INFO,console -Djava.library.path=/root/hdfs/hadoop-3.2.2/lib/native -Dhadoop.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dhadoop.log.file=hadoop-root-namenode-vm1.log -Dhadoop.home.dir=/root/hdfs/hadoop-3.2.2 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,RFA -Dhadoop.policy.file=hadoop-policy.xml org.apache.hadoop.hdfs.server.namenode.NameNode
    root     111512      1  3 09:34 pts/3    00:01:01 /usr/local/jdk1.8.0_131/bin/java -Dproc_zkfc -Djava.net.preferIPv4Stack=true -Dyarn.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dyarn.log.file=hadoop-root-zkfc-vm1.log -Dyarn.home.dir=/root/hdfs/hadoop-3.2.2 -Dyarn.root.logger=INFO,console -Djava.library.path=/root/hdfs/hadoop-3.2.2/lib/native -Dhadoop.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dhadoop.log.file=hadoop-root-zkfc-vm1.log -Dhadoop.home.dir=/root/hdfs/hadoop-3.2.2 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,RFA -Dhadoop.policy.file=hadoop-policy.xml -Dhadoop.security.logger=INFO,NullAppender org.apache.hadoop.hdfs.tools.DFSZKFailoverController
    root      71116      1  7 09:45 pts/2    00:01:18 /usr/local/jdk1.8.0_131/bin/java -Dproc_datanode -Djava.net.preferIPv4Stack=true -Dhadoop.security.logger=ERROR,RFAS -Dyarn.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dyarn.log.file=hadoop-root-datanode-vm3.log -Dyarn.home.dir=/root/hdfs/hadoop-3.2.2 -Dyarn.root.logger=INFO,console -Djava.library.path=/root/hdfs/hadoop-3.2.2/lib/native -Dhadoop.log.dir=/root/hdfs/hadoop-3.2.2/logs -Dhadoop.log.file=hadoop-root-datanode-vm3.log -Dhadoop.home.dir=/root/hdfs/hadoop-3.2.2 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,RFA -Dhadoop.policy.file=hadoop-policy.xml org.apache.hadoop.hdfs.server.datanode.DataNode
  • 相关阅读:
    阿里云 MaxCompute 2019-12 月刊
    【原】react+redux实战
    【原】http缓存与cdn相关技术
    【原】redux异步操作学习笔记
    【原】javascript最佳实践
    【原】react做tab切换的几种方式
    【原】web移动端常用知识点笔记
    【原】移动端界面的适配
    【原】js检测移动端横竖屏
    【原】javascript数组操作
  • 原文地址:https://www.cnblogs.com/skyzy/p/16874429.html
Copyright © 2020-2023  润新知