• kerberos系列之hdfs&yarn认证配置


    大数据安全系列的其它文章

    https://www.cnblogs.com/bainianminguo/p/12548076.html-----------安装kerberos

    https://www.cnblogs.com/bainianminguo/p/12548334.html-----------hadoop的kerberos认证

    https://www.cnblogs.com/bainianminguo/p/12548175.html-----------zookeeper的kerberos认证

    https://www.cnblogs.com/bainianminguo/p/12584732.html-----------hive的kerberos认证

    https://www.cnblogs.com/bainianminguo/p/12584880.html-----------es的search-guard认证

    https://www.cnblogs.com/bainianminguo/p/12639821.html-----------flink的kerberos认证

    https://www.cnblogs.com/bainianminguo/p/12639887.html-----------spark的kerberos认证

    一、安装hadoop

    1、解压安装包重命名安装目录

    [root@cluster2_host1 data]# tar -zxvf hadoop-2.7.1.tar.gz -C /usr/local/
    [root@cluster2_host1 local]# mv hadoop-2.7.1/ hadoop
    

      

    2、设置hadoop的环境变量

    [root@cluster2_host1 bin]# vim /etc/profile

    export HADOOP_HOME=/usr/local/hadoop
    export PATH=$PATH:/usr/local/hadoop/bin
    

      

    3、添加hdfs用户并修改hdfs的属组

       65  groupadd hdfs
       66  useradd hdfs -g hdfs
       67  cat /etc/passwd
       68  chown -R hdfs:hdfs /usr/local/hadoop/
       69  chown -R hdfs:hdfs /usr/local/hadoop/
    

      

    4、修改hdfs配置文件

    vim core-site.xml 

    <configuration>
          <property>
            <name>fs.default.name</name>
            <value>hdfs://cluster2_host1:9000</value>
          </property>
          <property>
            <name>hadoop.tmp.dir</name>
            <value>/data/vdb1/tmp</value>
          </property>
        </configuration>
    

      

    vim mapred-site.xml

    <configuration>
          <property>
            <name>mapreduce.framework.name</name>
            <value>yarn</value>
          </property>
          <property>
            <name>mapreduce.jobhistory.address</name>
            <value>cluster2_host1:10020</value>
          </property>
          <property>
            <name>mapreduce.jobhistory.webapp.address</name>
            <value>cluster2_host1:19888</value>
          </property>
    </configuration>
    

      

     vim hdfs-site.xml

    <configuration>
          <property>
            <name>dfs.replication</name>
            <value>2</value>
          </property>
          <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:/data/vdb1/name</value>
          </property>
          <property>
            <name>dfs.datanode.data.dir</name>
            <value>file:/data/vdb1/data</value>
          </property>
          <property>
            <name>dfs.secondary.http.address</name>
            <value>cluster2_host2:50090</value>
          </property>
        </configuration>
    

      

    vim yarn-site.xml

    <configuration>
        <!-- Site specific YARN configuration properties -->
          <property>
            <name>yarn.nodemanager.aux-services</name>
            <value>mapreduce_shuffle</value>
          </property>
          <property>
            <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
            <value>org.apache.hadoop.mapred.ShuffleHandler</value>
          </property>
          <property>
            <name>yarn.resourcemanager.address</name>
            <value>cluster2_host1:8032</value>
          </property>
          <property>
        <name>yarn.resourcemanager.scheduler.address</name>
            <value>cluster2_host1:8030</value>
          </property>
          <property>
            <name>yarn.resourcemanager.resource-tracker.address</name>
            <value>cluster2_host1:8031</value>
          </property>
          <property>
            <name>yarn.resourcemanager.admin.address</name>
            <value>cluster2_host1:8033</value>
          </property>
          <property>
            <name>yarn.resourcemanager.webapp.address</name>
            <value>cluster2_host1:8088</value>
          </property>
    </configuration>
    

      

    修改slaves文件

    [root@cluster2_host1 hadoop]# cat slaves 
    cluster2_host1
    cluster2_host3
    cluster2_host2
    

      

    5、创建目录和修改属组

    [root@cluster2_host3 bin]# groupadd hdfs
    [root@cluster2_host3 bin]# useradd hdfs -g hdfs
    [root@cluster2_host3 bin]# mkdir /data/vdb1/tmp
    [root@cluster2_host3 bin]# mkdir /data/vdb1/data
    [root@cluster2_host3 bin]# mkdir /data/vdb1/name
    [root@cluster2_host3 bin]# chown -R hdfs:hdfs /data/vdb1/tmp/
    [root@cluster2_host3 bin]# chown -R hdfs:hdfs /data/vdb1/data
    [root@cluster2_host3 bin]# chown -R hdfs:hdfs /data/vdb1/name
    [root@cluster2_host3 bin]# chown -R hdfs:hdfs /usr/local/hadoop/
    

      

    6、拷贝安装目录到其他节点

    171  scp -r hadoop/ root@cluster2_host2:/usr/local/
      172  scp -r hadoop/ root@cluster2_host3:/usr/local/
    

      

    7、格式化hdfs

    [root@cluster2_host1 local]# hdfs namenode -format
    

      

    8、启动yarn

    [root@cluster2-host1 sbin]# ./start-yarn.sh
    

      

    9、启动hdfs

    [root@cluster2-host1 sbin]# ./start-dfs.sh
    

      

    10、检查进程

    [root@cluster2-host1 data]# jps
    10004 DataNode
    29432 ResourceManager
    8942 Jps
    9263 NameNode
    30095 NodeManager
    

      

    二、hdfs配置kerberos认证

    1、所有节点安装autoconf

    yum install autoconf -y
    

      

    2、所有节点安装gcc

    yum install gcc -y
    

      

    3、安装jsvc

    542  tar -zxvf commons-daemon-1.2.2-src.tar.gz 
    543  /data/commons-daemon-1.2.2-src/src/native/unix
    
      554  ./support/buildconf.sh 
      555  ./configure 
      556  make
    

      

    检查是否安装完成

    [root@cluster2-host1 unix]# ./jsvc -help
    Usage: jsvc [-options] class [args...]
    
    Where options include:
    
        -help | --help | -?
            show this help page (implies -nodetach)
        -jvm <JVM name>
            use a specific Java Virtual Machine. Available 
    
    
    ln -s /data/commons-daemon-1.2.2-src/src/native/unix/jsvc /usr/local/bin/jsvc
    

      

    4、修改hdfs-env.sh的配置文件

    vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
    
    export JSVC_HOME=/data/commons-daemon-1.2.2-src/src/native/unix
    
    export HADOOP_SECURE_DN_USER=hdfs
    

      

    分发到其他节点

     

    5、创建hdfs的principal

    kadmin.local:  addprinc hdfs/cluster2-host1
    kadmin.local:  addprinc hdfs/cluster2-host2
    kadmin.local:  addprinc hdfs/cluster2-host3
    kadmin.local:  addprinc http/cluster2-host1
    kadmin.local:  addprinc http/cluster2-host2
    kadmin.local:  addprinc http/cluster2-host3
    
    kadmin.local:  ktadd -norandkey -k /etc/security/keytab/hdfs.keytab hdfs/cluster2-host1
    kadmin.local:  ktadd -norandkey -k /etc/security/keytab/hdfs.keytab hdfs/cluster2-host2
    kadmin.local:  ktadd -norandkey -k /etc/security/keytab/hdfs.keytab hdfs/cluster2-host3
    kadmin.local:  ktadd -norandkey -k /etc/security/keytab/http.keytab http/cluster2-host1
    kadmin.local:  ktadd -norandkey -k /etc/security/keytab/http.keytab http/cluster2-host2
    kadmin.local:  ktadd -norandkey -k /etc/security/keytab/http.keytab http/cluster2-host3
    

      

    6、分发秘钥文件

    [root@cluster2-host1 etc]# scp hdfs.keytab http.keytab root@cluster2-host2:/usr/local/hadoop/etc/
    hdfs.keytab                                                                                                                                                                                                                                 100% 1559     1.5KB/s   00:00    
    http.keytab                                                                                                                                                                                                                                 100% 1559     1.5KB/s   00:00    
    [root@cluster2-host1 etc]# scp hdfs.keytab http.keytab root@cluster2-host3:/usr/local/hadoop/etc/
    hdfs.keytab                                                                                                                                                                                                                                 100% 1559     1.5KB/s   00:00    
    http.keytab 
    

      

    7、修改hdfs的配置文件

     

    修改core-site.xml文件

       <property>
            <name>hadoop.security.authentication</name>
            <value>kerberos</value>
          </property>
          <property>
            <name>hadoop.security.authorization</name>
            <value>true</value>
          </property>
    

      

    修改修改hdfs-site.xml

    <property>
        <name>dfs.block.access.token.enable</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.namenode.kerberos.principal</name>
        <value>hdfs/cluster2-host1@HADOOP.COM</value>
    </property>
    <property>
        <name>dfs.namenode.keytab.file</name>
        <value>/usr/local/hadoop/etc/hdfs.keytab</value>
    </property>
    <property>
        <name>dfs.namenode.kerberos.internal.spnego.principal</name>
        <value>http/hadoop@HADOOP.COM</value>
    </property>
    <property>
        <name>dfs.namenode.kerberos.internal.spnego.keytab</name>
        <value>http/cluster2-host1@HADOOP.COM</value>
    </property>
    <property>
        <name>dfs.web.authentication.kerberos.principal</name>
        <value>hdfs/cluster2-host1@HADOOP.COM</value>
    </property>
    <property>
        <name>dfs.web.authentication.kerberos.keytab</name>
        <value>/usr/local/hadoop/etc/hdfs.keytab</value>
    </property>
    <property>
        <name>dfs.datanode.kerberos.principal</name>
        <value>hdfs/cluster2-host1@HADOOP.COM</value>
    </property>
    <property>
        <name>dfs.datanode.keytab.file</name>
        <value>/usr/local/hadoop/etc/hdfs.keytab</value>
    </property>
    <property>
        <name>dfs.datanode.address</name>
        <value>0.0.0.0:1004</value>
    </property>
    <property>
        <name>dfs.datanode.http.address</name>
        <value>0.0.0.0:1006</value>
    </property>
    

      

     

    如果有secondnamenode,则还需要加下面的配置

     

    <property>
    	<name>dfs.secondary.namenode.keytab.file</name>
    	<value>/usr/local/hadoop/etc/hdfs.keytab</value>
    </property>
    <property>
    	<name>dfs.secondary.namenode.kerberos.principal</name>
    	<value>hdfs/cluster2-host1@HADOOP.COM</value>
    </property>
    

     

      

    修改yarn-site.xml

    <property>
        <name>yarn.resourcemanager.principal</name>
        <value>hdfs/cluster2-host1@HADOOP.COM</value>
    </property>
    <property>
        <name>yarn.resourcemanager.keytab</name>
        <value>/usr/local/hadoop/etc/hdfs.keytab</value>
    </property>
    <property>
        <name>yarn.nodemanager.keytab</name>
        <value>/usr/local/hadoop/etc/hdfs.keytab</value>
    </property>
    <property>
        <name>yarn.nodemanager.principal</name>
        <value>hdfs/cluster2-host1@HADOOP.COM</value>
    </property>
    

      

     

     

    分发配置文件到其他节点

    [root@cluster2-host1 hadoop]# scp core-site.xml hdfs-site.xml yarn-site.xml root@cluster2-host2:/usr/local/hadoop/etc/hadoop/
    core-site.xml                                                                                                                                                                                                                               100% 1241     1.2KB/s   00:00    
    hdfs-site.xml                                                                                                                                                                                                                               100% 2544     2.5KB/s   00:00    
    yarn-site.xml                                                                                                                                                                                                                               100% 2383     2.3KB/s   00:00    
    [root@cluster2-host1 hadoop]# scp core-site.xml hdfs-site.xml yarn-site.xml root@cluster2-host3:/usr/local/hadoop/etc/hadoop/
    core-site.xml                                                                                                                                                                                                                               100% 1241     1.2KB/s   00:00    
    hdfs-site.xml                                                                                                                                                                                                                               100% 2544     2.5KB/s   00:00    
    yarn-site.xml 
    

      

    8、启动hdfs

    Hdfs用户执行下面的脚本

    start-dfs.sh

    [root@cluster2-host1 sbin]# 
    [root@cluster2-host1 sbin]# jps
    32595 Secur
    30061 Jps
    28174 NameNode
    

      

    Root用户执行下面的脚本

     ./start-secure-dns.sh

     

    检查进程,这里需要注意,jps是看不到datenode的进程的

    [root@cluster2-host1 sbin]# ps auxf |grep datanode
    

      

    9、验证

    [root@cluster2-host1 hadoop]# hdfs dfs -ls /
    20/03/03 08:06:40 WARN ipc.Client: Exception encountered while connecting to the server : javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]
    ls: Failed on local exception: java.io.IOException: javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]; Host Details : local host is: "cluster2-host1/10.87.18.34"; destination host is: "cluster2-host1":9000; 
    [root@cluster2-host1 hadoop]#  kinit -kt /etc/security/keytab/hdfs.keytab hdfs/cluster2-host1
    [root@cluster2-host1 hadoop]# hdfs dfs -ls /
    Found 4 items
    drwxr-xr-x   - root supergroup          0 2020-03-02 06:25 /flink
    drwxr-xr-x   - root supergroup          0 2020-03-02 04:30 /spark_jars
    drwx-wx-wx   - root supergroup          0 2020-03-02 21:12 /tmp
    drwxr-xr-x   - root supergroup          0 2020-03-02 21:11 /user
    

      

    三、配置yarn的kerberos认证

    1、配置yarn-site.xml配置文件

    <property>
        <name>yarn.nodemanager.container-executor.class</name>
      <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
    </property>
    <property>
        <name>yarn.nodemanager.linux-container-executor.group</name>
        <value>hdfs</value>
    </property>
    <property>
        <name>yarn.nodemanager.linux-container-executor.path</name>
        <value>/bin/container-executor</value>
    </property>
    

      

    yarn.nodemanager.linux-container-executor.path指定了container-executor的路径,container-executor是可执行二进制文件,它需要一个配置文件:

    yarn.nodemanager.linux-container-executor.groupnodemanager的启动用户所属组

     

    2、确认container-executor路径

    [root@cluster2-host1 bin]# strings container-executor |grep etc
    ../etc/hadoop/container-executor.cfg
    [root@cluster2-host1 bin]# cd /usr/local/hadoop/bin/
    [root@cluster2-host1 bin]# ll
    total 448
    -rwxr-xr-x. 1 hdfs hdfs 160127 Jun 29  2015 container-executor
    -rwxr-xr-x. 1 hdfs hdfs   6488 Jun 29  2015 hadoop
    -rwxr-xr-x. 1 hdfs hdfs   8786 Jun 29  2015 hadoop.cmd
    -rwxr-xr-x. 1 hdfs hdfs  12223 Jun 29  2015 hdfs
    -rwxr-xr-x. 1 hdfs hdfs   7327 Jun 29  2015 hdfs.cmd
    -rwxr-xr-x. 1 hdfs hdfs   5953 Jun 29  2015 mapred
    -rwxr-xr-x. 1 hdfs hdfs   6310 Jun 29  2015 mapred.cmd
    -rwxr-xr-x. 1 hdfs hdfs   1776 Jun 29  2015 rcc
    -rwxr-xr-x. 1 hdfs hdfs 204075 Jun 29  2015 test-container-executor
    -rwxr-xr-x. 1 hdfs hdfs  13308 Jun 29  2015 yarn
    -rwxr-xr-x. 1 hdfs hdfs  11386 Jun 29  2015 yarn.cmd
    

      

    3、创建目录,拷贝可执行文件和配置文件到指定目录

    [root@cluster2-host1 bin]# mkdir -p /hdp/bin
    [root@cluster2-host1 bin]# mkdir -p /hdp/etc/hadoop
    [root@cluster2-host1 bin]# scp /usr/local/hadoop/bin/container-executor /hdp/bin/
    [root@cluster2-host1 bin]# scp /usr/local/hadoop/etc/hadoop/container-executor.cfg /hdp/etc/hadoop/
    

      

    修改配置文件的内容如下

    yarn.nodemanager.linux-container-executor.group=hdfs
    banned.users=mysql
    min.user.id=500
    allowed.system.users=root
    

      

    4、修改可执行文件的属组

    [root@cluster2-host1 hadoop]# ll /hdp/bin/container-executor 
    -rwxr-xr-x. 1 root hdfs 160127 Mar  3 20:12 /hdp/bin/container-executor
    [root@cluster2-host1 hadoop]# ll /hdp/etc/hadoop/
    total 4
    -rw-r--r--. 1 root root 318 Mar  3 20:13 container-executor.cfg
    [root@cluster2-host1 hadoop]# 
    

      

    修改权限

    [root@cluster2-host1 hadoop]# chmod 6050 /hdp/bin/container-executor 
    [root@cluster2-host1 hadoop]# ll /hdp/bin/container-executor 
    ---Sr-s---. 1 root hdfs 160127 Mar  3 20:12 /hdp/bin/container-executor
    

      

    5、做如下检查,如果输出一致,则container-executor配置完成

     

    [root@cluster2-host1 hadoop]# hadoop checknative
    20/03/03 20:29:41 WARN bzip2.Bzip2Factory: Failed to load/initialize native-bzip2 library system-native, will use pure-Java version
    20/03/03 20:29:41 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
    Native library checking:
    hadoop:  true /usr/local/hadoop/lib/native/libhadoop.so.1.0.0
    zlib:    true /lib64/libz.so.1
    snappy:  true /lib64/libsnappy.so.1
    lz4:     true revision:99
    bzip2:   false 
    openssl: false Cannot load libcrypto.so (libcrypto.so: cannot open shared object file: No such file or directory)!
    [root@cluster2-host1 hadoop]# /hdp/bin/container-executor --checksetup
    [root@cluster2-host1 hadoop]# 
    

     

      

     

    6、拷贝hdp目录 到其他节点,需要设置相同的属组和权限

    [root@cluster2-host1 sbin]# scp /hdp/etc/hadoop/container-executor.cfg root@cluster2-host2:/hdp/etc/hadoop/
    container-executor.cfg
    

      

    7、启动yarn

    [root@cluster2-host1 sbin]# ./start-yarn.sh 
    starting yarn daemons
    starting resourcemanager, logging to /usr/local/hadoop/logs/yarn-root-resourcemanager-cluster2-host1.out
    cluster2-host3: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-cluster2-host3.out
    cluster2-host2: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-cluster2-host2.out
    cluster2-host1: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-cluster2-host1.out
    

      

    8、验证yarn on kerberos配置完成,能正常执行即可

    [root@cluster2-host1 hadoop]# ./bin/hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /input /output
    

      

    输出如下

    [root@cluster2-host1 hadoop]# hdfs dfs -ls /output
    Found 2 items
    -rw-r--r--   2 hdfs supergroup          0 2020-03-03 21:40 /output/_SUCCESS
    -rw-r--r--   2 hdfs supergroup     
    

      

     

  • 相关阅读:
    linux添加开机启动项、登陆启动项、定时启动项、关机执行项等的方法
    linux下/etc/rc.d目录的介绍及redhat启动顺序
    Linux开机自动挂载存储的两种方式
    Linux中环境变量文件profile、bashrc、bash_profile之间的区别和联系
    linux命令详解——yum
    linux命令详解——ftp
    Shell脚本之sed详解
    shell awk读取文件中的指定行的指定字段
    MySQL的字符集
    shell脚本中的数组
  • 原文地址:https://www.cnblogs.com/bainianminguo/p/12548334.html
Copyright © 2020-2023  润新知