• BG.Hadoop.Master


    1. 安装JDK

      JDK安装包复制到/opt文件夹

      cd /opt

      rpm -ivh jdk-8u121-linux-x64.rpm

      vim /etc/profile

      增加 JAVA_HOME=/usr/java/default

      PATH=$JAVA_HOME/bin:$PATH

      source /etc/profile

      验证 echo $JAVA_HOME

      java -version

    2. 配置SSH(免密码登录),Hadoop集群间登录使用。

      a> 在客户端进行如下操作:

        ssh-keygen -t rsa  生成密钥

        cat id_rsa.pub >> authorized_keys  客户端注册公钥

        cat id_rsa.pub | ssh root@bigdata.mysql "cat - >> ~/.ssh/authorized_keys"  注册公钥到服务器

      b> 在服务器段进行:  

        chmod 700 -R .ssh

        chmod 600 .ssh/authorized_keys

      或者:在客户端直接 ssh-copy-id ~/.ssh/id_rsa.pub root@bigdata.mysql

      c> 测试

        在客户端:ssh bigdata.mysql

    3. Hadoop集群搭建-Master

      tar zxf hadoop-2.7.3.tar.gz

      vim /etc/profile

      增加 HADOOP_HOME=/opt/hadoop-2.7.3

      PATH增加$HADOOP_HOME/bin:段

      source /etc/profile

      检查 echo $HADOOP_HOME

      cd /opt/hadoop-2.7.3/etc/hadoop/

      配置:core-site.xml; hdfs-site.xml; yarn-site.xml; mapred-site.xml; slaves

      core-site.xml

    <property>
        <name>fs.default.name</name>
        <value>hdfs://bigdata.hadoop.master:9000</value>
      </property>
    
      <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/hadoop-2.7.3/current/tmp</value>
      </property>
    
      <property>
        <name>fs.trash.interval</name>
        <value>4320</value>
      </property>

      mkdir -p /opt/hadoop-2.7.3/current/tmp

      hdfs-site.xml

    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/hadoop-2.7.3/current/data</value>
      </property>
      <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/hadoop-2.7.3/current/name</value>
      </property>
    
      <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/hadoop-2.7.3/current/data</value>
      </property>
    
      <property>
        <name>dfs.replication</name>
        <value>3</value>
      </property>
    
      <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
      </property>
    
      <property>
        <name>dfs.permission.superusergroup</name>
        <value>staff</value>
      </property>
    
      <property>
        <name>dfs.permission.enabled</name>
        <value>false</value>
      </property>

      mkdir -p /opt/hadoop-2.7.3/current/name

      mkdir -p /opt/hadoop-2.7.3/current/data

      yarn-site.xml

    <property> 
        <name>yarn.resourcemanager.hostname</name>
        <value>bigdata.hadoop.master</value>
      </property>
    
      <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
      </property>
    
      <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
      </property>
    
      <property>
        <name>yarn.resourcemanager.address</name>
        <value>bigdata.hadoop.master:18040</value>
      </property>
    
      <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>bigdata.haddop.master:18030</value>
      </property>
    
      <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>bigdata.hadoop.master:18025</value>
      </property>
    
      <property>
        <name>yarn.resource.manager.admin.address</name>
        <value>bigdata.hadoop.master:18141</value>
      </property>
    
      <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>bigdata.hadoop.master:18088</value>
      </property>
    
      <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
      </property>
    
      <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>86400</value>
      </property>
    
      <property>
        <name>yarn.log-aggregation.retain-check-interval-seconds</name>
        <value>86400</value>
      </property>
    
      <property>
        <name>yarn.nodemanager.remote-app-log-dir</name>
        <value>/tmp/logs</value>
      </property>
    
      <property>
        <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
        <value>logs</value>
      </property>

      mapred-site.xml

      cp mapred-site.xml.template mapred-site.xml

      vim mapred-site.xml 

     <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
      </property>
    
      <property>
        <name>mapreduce.jobtracker.http.address</name>
        <value>bigdata.hadoop.master:50030</value>
      </property>
    
      <property>
        <name>mapreduce.jobhistory.address</name>
        <value>bigdata.hadoop.master:10020</value>
      </property>
    
      <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>bigdata.hadoop.master:19888</value>
      </property>
    
      <property>
        <name>mapreduce.jobhistory.done-dir</name>
        <value>/jobhistory/done</value>
      </property>
    
      <property>
        <name>mapreduce.intermediate-done-dir</name>
        <value>/jobhistory/done_intermediate</value>
      </propery>
    
      <property>
        <name>mapreduce.job.ubertask.enable</name>
        <value>true</value>
      </property>

      slaves

      bigdata.hadoop.master

      bigdata.hadoop.slave1

      bigdata.hadoop.slave2

      bigdata.hadoop.slave3

      vim /opt/hadoop-2.7.3/etc/hadoop/hadoop-env.sh

      export JAVA_HOME=/usr/java/default

    4.  创建Slaves虚拟机

       bigdata.hadoop.slave1

      bigdata.hadoop.slave2

      bigdata.hadoop.slave3

    5. SSH免密码登录

      在bigdata.hadoop.master上执行:

      ssh-keygen -t rsa  --生成密钥公钥,一路回车

      ssh-copy-id -i ~/.ssh/id_rsa.pub root@bigdata.hadoop.slave1

      ssh-copy-id -i ~/.ssh/id_rsa.pub root@bigdata.hadoop.slave2

      ssh-copy-id -i ~/.ssh/id_rsa.pub root@bigdata.hadoop.slave2

      cat ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys

      测试免密码登录

      ssh bigdata.hadoop.slave1, ssh bigdata.hadoop.slave2, ssh bigdata.hadoop.slave3, ssh bigdata.hadoop.master

    6. 关闭防火墙和SELIXNUX

      service iptables stop  --临时关闭防火墙

      chkconfig iptables off  --永久关闭

      chkconfig --list|grep iptables

      vi /etc/sysconfig/selinux

      SELINUX=disabled

      setenforce 0

      getenforce

    7. Copy文件到虚拟机

      安装scp命令:yum install openssh-clients

      scp -r /opt/hadoop-2.7.3 root@bigdata.hadoop.slave1:/opt/

      配置Slaves的etc/profile

    8. 格式化Hadoop

      在Master上执行: hdfs namenode -format

    9. 启动Hadoop

      在Master上执行:/opt/hadoop-2.7.3/sbi/start-all.sh

    10. 查看Hadoop状态

      在Master执行:JPS

        NodeManager;  NameNode;  SecondaryNameNode;  ResourceManager;  DataNode

      在Slaves上执行:JPS

        NodeManager;  DataNode;

      在外部访问:http://bigdata.hadoop.master:50070 和 http://bigdata.hadoop.master:18088

      打开ServiceHistoryServer

      /opt/hadoop-2.7.3/sbin/mr-jobhistory-daemon.sh start historyserver

      在外部访问:http://bigdata.hadoop.master:19888

    11. 执行Map-Reduce任务

       词频统计

      1. 将1个txt文本上传到hdfs

        hdfs dfs -put testfile.txt /task1/input/testfile

      2. 执行Map-Reduce任务

        hadoop jar /opt/hadoop-2.7.3/share/hadoop/mareduce/hadoop-mapreduce-examples-2.7.3 jar wordcound /task1/input/testfile /task1/output

      3. 查看输出结果

        hdfs dfs -cat /task1/output/part-r-00000

      

      

  • 相关阅读:
    在C#中对枚举进行位运算--枚举组合
    详解C#泛型(一)
    详解C#委托和事件(二)
    详解C#特性和反射(四)
    详解C#特性和反射(三)
    详解C#特性和反射(二)
    openssl 证书cert与key合并pfx
    GRPC Oauth IdentityServer4
    GRPC Oauth Identity
    PG SQL funcation
  • 原文地址:https://www.cnblogs.com/Niko12230/p/6479865.html
Copyright © 2020-2023  润新知