• hadoop3.2.1 分布式集群搭建


    hadoop 结构

    [root@namenode ~]# cat /etc/hosts
    127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
    ::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
    172.16.230.121    namenode
    172.16.230.122    standbynamenode
    172.16.230.123    datanode1
    172.16.230.124    datanode2
    172.16.230.125    datanode3

    下载:

    wget https://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz

    解压缩:

    tar -zxvf hadoop-3.2.1.tar.gz
    mv hadoop-3.2.1  /data/hadoop

    创建hadoop账户,配置环境变量

    for i in  {121..125}; do ssh root@172.16.230.$i "useradd hadoop ; chown hadoop.hadoop /data/hadoop -R;  echo "123456" | passwd --stdin hadoop"; done

    配置环境变量

    [hadoop@namenode ~]$ vim .bashrc 
    
    # .bashrc
    
    # Source global definitions
    if [ -f /etc/bashrc ]; then
            . /etc/bashrc
    fi
    
    # Uncomment the following line if you don't like systemctl's auto-paging feature:
    # export SYSTEMD_PAGER=
    
    # User specific aliases and functions
    
    export JAVA_HOME=/data/jdk
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
    export PATH=$JAVA_HOME/bin:$PATH
    export HADOOP_HOME=/data/hadoop
    export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

    配置hadoop. 进入到hadoop目录

    [hadoop@namenode ~]$ cd /data/hadoop/etc/hadoop/

    [hadoop@namenode hadoop]$ cat hadoop-env.sh | grep -v "#"  | sed '/^$/d'
    export JAVA_HOME=/data/jdk
    export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
    export HDFS_NAMENODE_USER=hadoop
    export HDFS_DATANODE_USER=hadoop 
    export HDFS_ZKFC_USER=hadoop 
    export HDFS_JOURNALNODE_USER=hadoop
    export YARN_RESOURCEMANAGER_USER=hadoop 
    export YARN_NODEMANAGER_USER=hadoop

    配置core-site.xml

    [hadoop@namenode hadoop]$ cat core-site.xml 
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
    
        http://www.apache.org/licenses/LICENSE-2.0
    
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->
    
    <!-- Put site-specific property overrides in this file. -->
    
    <configuration>
        <property> 
            <name>fs.defaultFS</name> 
            <value>hdfs://mycluster</value> </property> 
        <!--临时目录-->
        <property> 
            <name>hadoop.tmp.dir</name> 
            <value>/data/hadoop/tmpdata</value> # 新建/data/hadoop/tmpdata/目录授权
        </property>
        <!--webUI展示时的用户-->
        <property> 
            <name>hadoop.http.staticuser.user</name>            
            <value>hadoop</value> 
        </property>
        <!--高可用依赖的zookeeper的通讯地址-->
        <property>
            <name>ha.zookeeper.quorum</name>
            <value>kafka1:2181,kafka2:2181,kafka3:2181</value>
        </property>
    </configuration>

    hdfs-site.xml

    [hadoop@namenode hadoop]$ cat hdfs-site.xml 
    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
    
        http://www.apache.org/licenses/LICENSE-2.0
    
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->
    
    <!-- Put site-specific property overrides in this file. -->
    
    <configuration>
        <property>
            <name>dfs.nameservices</name>
            <value>mycluster</value>
        </property>
        <!--定义hdfs集群中的namenode的ID号-->
        <property>
            <name>dfs.ha.namenodes.mycluster</name>
            <value>nn1,nn2</value>
        </property>
        <!--定义namenode的主机名和rpc协议的端口-->
        <property>
            <name>dfs.namenode.rpc-address.mycluster.nn1</name>
            <value>namenode:8020</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-address.mycluster.nn2</name>
            <value>standbynamenode:8020</value>
        </property>
        <!--定义namenode的主机名和http协议的端口-->
        <property>
            <name>dfs.namenode.http-address.mycluster.nn1</name>
            <value>namenode:9870</value>
        </property>
        <property>
            <name>dfs.namenode.http-address.mycluster.nn2</name>
            <value>standbynamenode:9870</value>
        </property>
        <!--定义共享edits的url-->
        <property>
            <name>dfs.namenode.shared.edits.dir</name>
            <value>qjournal://namenode:8485;standbynamenode:8485;datanode1:8485;datanode2:8485;datanode3:8485/ljgk</value>
        </property>
        <!--定义hdfs的客户端连接hdfs集群时返回active namenode地址-->
        <property>
            <name>dfs.client.failover.proxy.provider.mycluster</name>
            <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
        </property>
    
        <!--hdfs集群中两个namenode切换状态时的隔离方法-->
        <property>
            <name>dfs.ha.fencing.methods</name>
            <value>sshfence</value>
        </property>
    
        <!--hdfs集群中两个namenode切换状态时的隔离方法的秘钥-->
        <property>
            <name>dfs.ha.fencing.ssh.private-key-files</name>
            <value>/root/.ssh/id_rsa</value>
        </property>
    
        <!--journalnode集群中用于保存edits文件的目录-->
        <property>
            <name>dfs.journalnode.edits.dir</name>
            <value>/data/hadoop/journalnode/data</value>  #新建journalnode 保存路径
        </property>
        <!--ha的hdfs集群自动切换namenode的开关-->
        <property>
            <name>dfs.ha.automatic-failover.enabled</name>
            <value>true</value>
        </property>
    
        <property>
            <name>dfs.safemode.threshold.pct</name>
            <value>1</value>
        </property>
    </configuration>

    workers

    [hadoop@namenode hadoop]$ cat workers 
    datanode1
    datanode2
    datanode3

    yarn-site.xml

    [hadoop@namenode hadoop]$ cat yarn-site.xml 
    <?xml version="1.0"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
    
        http://www.apache.org/licenses/LICENSE-2.0
    
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->
    <configuration>
    
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
        <description>Enable RM high-availability</description>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>cluster1</value>
        <description>Name of the cluster</description>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
        <description>The list of RM nodes in the cluster when HA is enabled</description>
    </property>
     <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>namenode</value>
        <description>The hostname of the rm1</description>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>standbynamenode</value>
        <description>The hostname of the rm2</description>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>namenode:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>standbynamenode:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>kafka1:2181,kafka2:2181,kafka3:2181</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
         <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    
    </configuration>

    mapred-site.xml

    [hadoop@namenode hadoop]$ cat mapred-site.xml 
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <!--
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
    
        http://www.apache.org/licenses/LICENSE-2.0
    
      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License. See accompanying LICENSE file.
    -->
    
    <!-- Put site-specific property overrides in this file. -->
    
    <configuration>
    
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.application.classpath</name>
        <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
    </property>
    
    </configuration>

    拷贝到 所有节点

    [hadoop@namenode hadoop]$ for i in {122..125}; do scp -r  /data/hadoop/ hadoop@172.16.230.$i:/data/; done
    
    [hadoop@namenode hadoop]$ mkidr -p  /data/hadoop/journalnode/data/
    
    [hadoop@namenode hadoop]$  mkidr -p  /data/hadoop/tmpdata/
    
    [hadoop@namenode hadoop]$  for i in {122..125}; do scp -r ~/.bashrc hadoop@172.16.230.$i:~/; done

    启动:

    1. 一定要先启动zookeeper集群

    2. 启动journalnode

    登陆到 每台机器上 启动 journalnode
    
    hadoop-daemon.sh start journalnode

    3. 格式化一个namenode

    hdfs namenode -format
    hadoop-daemon.sh start namenode

    4. 启动standbynamenode 节点 namenode

    #登陆到 standbynamenode 服务器,先手动执行数据同步, 再启动
    hdfs namenode -bootstrapStandby
    hadoop-daemon.sh start namenode

    5. 初始化zkfs(任意节点)

    hdfs  zkfc  -formatZK

    6.停止haoop所有进程

    stop-dfs.sh

    7. 启动所有

    start-all.sh

    上传测试:

    [hadoop@namenode ~]$ hdfs dfs -mkdir /test
    
    [hadoop@namenode hadoop]$ hdfs dfs -put README.txt  /test
    2021-01-11 15:59:10,923 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
    
    [hadoop@namenode hadoop]$ hdfs dfs -ls  /test

    访问 9870 查看

    参考: https://segmentfault.com/a/1190000023834334

  • 相关阅读:
    版本管理系统:svn和git
    Java学习笔记七 常用API对象三
    Java学习笔记六 常用API对象二
    Java学习笔记五 常用API对象一
    Java学习笔记三.3
    Java学习笔记三.2
    Java学习笔记三
    析构函数总结
    C++之类的构造函数,不得不学明白的重点
    C++拷贝构造函数(深拷贝,浅拷贝)
  • 原文地址:https://www.cnblogs.com/fengjian2016/p/14262682.html
Copyright © 2020-2023  润新知