• hadoop安装


    安装jdk

    rpm -ivh /opt/downloads/jdk-8u201-linux-x64.rpm

    vim /etc/profile 最下方加入

    export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
    export JAVA_BIN=/usr/java/jdk1.8.0_201-amd64/bin
    export PATH=$PATH:$JAVA_HOME/bin
    export CLASSPATH=:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

    source /etc/profile

    vim /etc/bashrc 最下方加入

    export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
    export JAVA_BIN=/usr/java/jdk1.8.0_201-amd64/bin
    export PATH=$PATH:$JAVA_HOME/bin
    export CLASSPATH=:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

    source /etc/bashrc

    下载hadoop
    wget -P /opt/downloads http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.0/hadoop-3.2.0.tar.gz
    tar zxvf /opt/downloads/hadoop-3.2.0.tar.gz -C /opt
    mv /opt/hadoop-3.2.0/ /opt/hadoop

    cp /opt/hadoop/etc/hadoop/*.xml input
    /opt/hadoop/bin/hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.0.jar grep input output 'dfs[a-z.]+'

    创建文件夹
    mkdir /opt/hadoop/tmp
    mkdir /opt/hadoop/var
    mkdir /opt/hadoop/dfs
    mkdir /opt/hadoop/dfs/name
    mkdir /opt/hadoop/dfs/data
    mkdir /opt/hadoop/fs/checkpoint -p
    mkdir /opt/hadoop/fs/checkpoint/edits
    mkdir /opt/hadoop/jobhistory/done -p
    mkdir /opt/hadoop/jobhistory/done_intermediate -p

    查看java_home路径
    echo $JAVA_HOME

    修改配置

    文件系统

    vim /opt/hadoop/etc/hadoop/core-site.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
            <property>
                    <name>hadoop.tmp.dir</name>
                    <value>../tmp</value>
                    <description>namenode上本地的hadoop临时文件夹</description>
            </property>
            <property>
                    <name>io.file.buffer.size</name>
                    <value>262144</value>
                    <description>Size of read/write buffer used in SequenceFiles.256k</description>
            </property>
            <property>
                    <name>io.native.lib.available</name>
                    <value>true</value>
            </property>
            <property>
                    <name>fs.defaultFS</name>
                    <value>hdfs://0.0.0.0:9000</value>
                    <description>HDFS的URI,文件系统://namenode标识:端口号</description>
            </property>
    </configuration>

    name 节点用 fs.defaultFS,不建议使用 fs.default.name。

    hadoop.tmp.dir 是hadoop文件系统依赖的基础配置,很多路径都依赖它。如果下面hdfs-site.xml中不配置 namenode 和 datanode 的存放位置,默认就放在如下路径中。

    副本

    vim /opt/hadoop/etc/hadoop/hdfs-site.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>dfs.replication</name>
            <value>1</value>
            <description>副本个数,配置默认是3,应小于datanode机器数量</description>
        </property>
        <property>
            <name>dfs.namenode.http-address</name>
            <value>0.0.0.0:9870</value>
        </property>
        <property>
            <name>dfs.namenode.name.dir</name>
            <value>../dfs/name</value>
            <description>Path on the local filesystem where the NameNode stores the namespace and transactions logs persistently.</description>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>../dfs/data</value>
            <description>Comma separated list of paths on the localfilesystem of a DataNode where it should store its blocks.</description>
        </property>
        <property>
            <name>dfs.permissions</name>
            <value>false</value>
            <description>need not permissions</description>
        </property>
        <property>
            <name>dfs.webhdfs.enabled</name> 
            <value>true</value> 
        </property>
        <property>
            <name>dfs.block.size</name> 
            <value>134217728</value> 
            <description>HDFS blocksize of 128MB for large file-systems.used for on-line</description> 
            </property>
        <property>
            <name>fs.checkpoint.dir</name>
            <value>../checkpoint</value>
        </property>
        <property>
            <name>fs.checkpoint.edits.dir</name>
            <value>../checkpoint</value>
        </property>
    </configuration>

    资源调度框架

    vim /opt/hadoop/etc/hadoop/mapred-site.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>mapreduce.framework.name</name>
            <value>yarn</value>
            <description>设置MapReduce的运行平台为yarn。local表示本地运行,classic表示经典mapreduce框架,yarn表示新的框架。</description>
        </property>
        <property>
            <name>mapred.job.tracker</name>
            <value>0.0.0.0:9001</value>
        </property>
        <property>
            <name>mapred.local.dir</name>
            <value>../var</value>
        </property>
        <property>
            <name>mapreduce.jobhistory.address</name>
            <value>0.0.0.0:10020</value>
            <description>MapReduce JobHistory Server IPC host:port</description>
        </property>
        <property>
            <name>mapreduce.jobhistory.webapp.address</name>
            <value>0.0.0.0:19888</value>
            <description>MapReduce JobHistory Server Web UI host:port</description>
        </property>
        <property>
            <name>mapreduce.jobhistory.done-dir</name>
            <value>../jobhistory/done</value>
            <description>MapReduce作业运行完之后放在哪</description>
        </property>
        <property>
            <name>mapreduce.jobhistory.intermediate-done-dir</name>
            <value>../jobhistory/done_intermediate</value>
            <description>MapReduce正在运行中的作业放在哪</description>
        </property>
        <property>
            <name>yarn.app.mapreduce.am.resource.mb</name>
        <value>512</value>
        </property>
        <property>
            <name>mapreduce.map.memory.mb</name>
            <value>512</value>
        </property>
        <property>
            <name>mapreduce.map.java.opts</name>
            <value>-Xmx512M</value>
        </property>
        <property>
            <name>mapreduce.reduce.memory.mb</name>
            <value>512</value>
        </property>
        <property>
            <name>mapreduce.reduce.java.opts</name>
            <value>-Xmx512M</value>
        </property>
        <property>
            <name>mapred.child.java.opts</name>
            <value>-Xmx512M</value>
        </property>
    </configuration>

    vim /opt/hadoop/etc/hadoop/yarn-site.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>yarn.nodemanager.aux-services</name>
            <value>mapreduce_shuffle</value>
            <description>NodeManager上运行的附属服务</description>
        </property>
        <property>
            <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
            <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
        <property>
            <name>yarn.resourcemanager.resource-tracker.address</name>
            <value>0.0.0.0:8031</value>
        </property>
        <property>
            <name>yarn.resourcemanager.address</name>
            <value>0.0.0.0:8032</value>
        </property>
        <property>
            <name>yarn.resourcemanager.scheduler.address</name>
            <value>0.0.0.0:8030</value>
        </property>
        <property>
            <name>yarn.resourcemanager.admin.address</name>
            <value>0.0.0.0:8033</value>
        </property>
        <property>
            <name>yarn.resourcemanager.webapp.address</name>
            <value>0.0.0.0:48088</value>
        </property>
        <property>  
            <name>yarn.log-aggregation-enable</name>  
            <value>true</value>  
        </property>
        <property>
            <name>yarn.nodemanager.env-whitelist</name>
            <value> JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ</value>
            <description>容器可能会覆盖的环境变量,而不是使用NodeManager的默认值</description>
        </property>
        <property>
            <name>yarn.nodemanager.vmem-check-enabled</name>
            <value>false</value>
            <description>关闭内存检测,虚拟机需要,虚拟环境不设置为false会报错</description>
        </property>
    </configuration>

    我机器上influxdb占用了8088端口,这里换成18088

    vim /opt/hadoop/sbin/start-dfs.sh
    头部加上

    HDFS_DATANODE_USER=root
    HDFS_DATANODE_SECURE_USER=hdfs
    HDFS_NAMENODE_USER=root
    HDFS_SECONDARYNAMENODE_USER=root

    vim /opt/hadoop/sbin/stop-dfs.sh
    头部加上

    HDFS_DATANODE_USER=root
    HDFS_DATANODE_SECURE_USER=hdfs
    HDFS_NAMENODE_USER=root
    HDFS_SECONDARYNAMENODE_USER=root

    vim /opt/hadoop/sbin/start-yarn.sh
    头部加上

    YARN_RESOURCEMANAGER_USER=root
    HDFS_DATANODE_SECURE_USER=yarn
    YARN_NODEMANAGER_USER=root

    vim /opt/hadoop/sbin/stop-yarn.sh
    头部加上

    YARN_RESOURCEMANAGER_USER=root
    HDFS_DATANODE_SECURE_USER=yarn
    YARN_NODEMANAGER_USER=root

    vim /opt/hadoop/etc/hadoop/hadoop-env.sh

    export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
    export HDFS_NAMENODE_USER=root
    export HDFS_DATANODE_USER=root
    export HDFS_SECONDARYNAMENODE_USER=root
    export HADOOP_PID_DIR=/var/run

    修改jdk路径,不能有空格

    pid_dir影响 NameNode DataNode SecondaryNameNode 的进程pid存储  

    vim /opt/hadoop/etc/hadoop/mapred-env.sh

    export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
    export HADOOP_MAPRED_PID_DIR=/var/run

    pid_dir影响 JobHistoryServer 的进程pid存储

    vim /opt/hadoop/etc/hadoop/yarn-env.sh

    export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
    export HADOOP_PID_DIR=/var/run

    pid_dir影响 NodeManager ResourceManager 的进程pid存储

    检查ssh免密登录
    ssh localhost

    配置ssh免密登录
    [root@localhost ~]# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
    [root@localhost ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    [root@localhost ~]# chmod 0600 ~/.ssh/authorized_keys

    再次检查
    ssh localhost

    //查看版本
    /opt/hadoop/bin/hadoop version

    格式化 namenode
    /opt/hadoop/bin/hdfs namenode -format
    成功会看到 “successfully formatted” 和 “Exitting with status 0”,若为 “Exitting with status 1” 则是出错。

    启动

    /opt/hadoop/sbin/start-dfs.sh
    /opt/hadoop/sbin/stop-dfs.sh

    /opt/hadoop/sbin/start-all.sh
    查看服务 jps

    验证
    /opt/hadoop/bin/hadoop fs -ls /

    curl -X Get http://localhost:9870
    curl -X Get http://localhost:18088

  • 相关阅读:
    MVC中CheckBox
    Python中的高级数据结构
    高级正则表达式技术(Python版)
    程序员可以兼任项目经理吗?
    浅谈五大Python Web框架
    学习Python编程的11个资源
    Python 代码性能优化技巧
    python多线程ctrl-c退出问题
    Python 笔记 : 类和继承
    Python的OO思想
  • 原文地址:https://www.cnblogs.com/wintersoft/p/11056759.html
Copyright © 2020-2023  润新知