准备工作
hadoop下载:(hadoop2.7.5)
http://archive.apache.org/dist/hadoop/core/
关闭防火墙:
# 停止防火墙 systemctl stop firewalld # 关闭防火墙开机自启动 systemctl disable firewalld
修改 hosts 文件,让 hadoop 对应本机 IP 地址 (非 127.0.0.1)
# vim /etc/hosts 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 10.0.0.19 vsr119 10.1.0.19 sr119 10.1.0.31 sr131 10.0.0.29 vsr129 10.1.0.29 sr129
安装JDK
# 解压(1.8以上) tar xf /opt/jdk-8u202-linux-x64.tar.gz # 配置环境变量(切换到自己的用户) $ vim .bashrc # JAVA_HOME export JAVA_HOME=/home/jiangchun/jdk1.8 export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$CLASSPATH export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH # 刷新环境变量 source .bashrc # 验证 $ java -version java version "1.8.0_161" Java(TM) SE Runtime Environment (build 1.8.0_161-b12) Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode)
安装Hadoop
# 解压 tar xf hadoop-2.7.5.tar.gz # 配置环境变量 vim .bashrc export HADOOP_HOME=/home/jiangchun/hadoop-2.7.5 export PATH=$PATH:$HADOOP_HOME/bin export PATH=$PATH:$HADOOP_HOME/sbin # 刷新环境变量 source .bashrc # 验证 $ hadoop version Hadoop 2.7.5 Subversion https://shv@git-wip-us.apache.org/repos/asf/hadoop.git -r 18065c2b6806ed4aa6a3187d77cbe21bb3dba075 Compiled by kshvachk on 2017-12-16T01:06Z Compiled with protoc 2.5.0 From source with checksum 9f118f95f47043332d51891e37f736e9 This command was run using /home/jiangchun/hadoop-2.7.5/share/hadoop/common/hadoop-common-2.7.5.jar
配置Hadoop
一、配置 HDFS
hadoop-env.sh
# vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/hadoop-env.sh # 配置 JDK 路径 # The java implementation to use. export JAVA_HOME=/home/jiangchun/jdk1.8
core-site.xml
# fs.defaultFS:默认的文件系统,NN会在这个节点(sr131)启动 # vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/core-site.xml <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://sr131:9000</value> </property> </configuration>
hdfs-site.xml
<configuration> <property> <name>dfs.namenode.name.dir</name> <value>file:/home/jiangchun/hadoop-2.7.5/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/mnt/DP_disk1/tpcds/dfs,/mnt/DP_disk2/tpcds/dfs,/mnt/DP_disk3/tpcds/dfs,/mnt/DP_disk4/tpcds/dfs,/mnt/DP_disk5/tpcds/dfs,/mnt/DP_disk6/tpcds/dfs,/mnt/DP_disk7/tpcds/dfs,/mnt/DP_disk8/tpcds/dfs</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.datanode.socket.write.timeout</name> <value>600000</value> </property> <!-- <property> <name>dfs.socket.timeout</name> <value>0</value> </property> --> <property> <name>dfs.datanode.max.transfer.threads</name> <value>4096000</value> </property> <property> <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name> <value>1000</value> </property> <property> <name>dfs.datanode.handler.count</name> <value>40</value> </property> <property> <name>dfs.client.socket-timeout</name> <value>300000</value> </property> <property> <name>dfs.datanode.max.xcievers</name> <value>8192</value> </property> </configuration>
Slaves
# vim slaves # 写本主机在hosts文件中注册的名称 sr131
挂载磁盘
# vim mount.sh mount -t ext4 -o noatime,nodiratime /dev/sdi1 /mnt/DP_disk1 mount -t ext4 -o noatime,nodiratime /dev/sdb1 /mnt/DP_disk2 mount -t ext4 -o noatime,nodiratime /dev/sdc1 /mnt/DP_disk3 mount -t ext4 -o noatime,nodiratime /dev/sdd1 /mnt/DP_disk4 mount -t ext4 -o noatime,nodiratime /dev/sde1 /mnt/DP_disk5 mount -t ext4 -o noatime,nodiratime /dev/sdf1 /mnt/DP_disk6 mount -t ext4 -o noatime,nodiratime /dev/sdg1 /mnt/DP_disk7 mount -t ext4 -o noatime,nodiratime /dev/sdh1 /mnt/DP_disk8 mount -o dax /dev/pmem0 /mnt/pmem0 mount -o dax /dev/pmem1 /mnt/pmem1
启动hdfs
# 第一次使用需要先格式化一次。之前若格式化过请先停止进程,然后删除文件再执行格式化操作 hdfs namenode -format # 启动 namenode hadoop-daemon.sh start namenode # 启动 datanode hadoop-daemon.sh start datanode # 验证,查看 jvm 进程 jps # 84609 Jps # 84242 NameNode # 84471 DataNode
浏览器访问 CentOS 的 IP 地址加端口号 (默认50070) 即可看到 web 端
二、配置 YARN
yarn-env.sh
# vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/yarn-env.sh 默认
yarn-site.xml
# vim yarn-site.xml <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>sr131</value> </property> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name> <value>99</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <!--<value>786432</value>--> <value>1715472</value> </property> <property> <name>yarn.scheduler.maximum-allocation-vcores</name> <!--<value>120</value>--> <value>96</value> </property> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>1</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/mnt/DP_disk1/tpcds/yarn,/mnt/DP_disk2/tpcds/yarn,/mnt/DP_disk3/tpcds/yarn,/mnt/DP_disk4/tpcds/yarn,/mnt/DP_disk5/tpcds/yarn,/mnt/DP_disk6/tpcds/yarn,/mnt/DP_disk7/tpcds/yarn,/mnt/DP_disk8/tpcds/yarn</value> </property> <property> <name>yarn.log.aggregation.enable</name> <value>fasle</value> </property> <property> <name>yarn.nodemanager.log.retain-seconds</name> <value>25920000</value> </property> <property> <name>yarn.log.server.url</name> <value>http://sr131:19888/jobhistory/logs/</value> </property> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> </configuration>
启动 yarn,需保证 hdfs 已启动
# 启动 resourcemanager yarn-daemon.sh start resourcemanager # 启动 nodemanager yarn-daemon.sh start nodemanager # 查看 JVM 进程 jps # 1604 DataNode # 1877 ResourceManager # 3223 Jps # 1468 NameNode # 2172 NodeManager
浏览器访问 CentOS 的 IP 地址加端口号 (默认8088) 即可看到 web 端
三、配置 MapReduce
mapred-env.sh
# vim mapred-env.sh 默认 如果不行,修改一下JDK路径 export JAVA_HOME=/home/jiangchun/jdk1.8
mapred-site.xml
# 复制一份 cp /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml.template /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml # 编辑 vim /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
<configuration> <!-- 指定MR运行在YARN上 --> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>