1
Standalone模式
默认模式,即默认配置即是standalone模式
装好hadoop-client,会将依赖的包安装好
yum install hadoop-client
配置文件
各个配置文件在/etc/hadoop/conf下
特别注意:Standalone模式下,HDFS是用本地文件系统来模拟的
验证(用自带的grep的mr示例):
mkdir input
cp /etc/hadoop/conf/*.xml input
hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.12.1.jar grep input output 'dfs[a-z.]+'
cat output/*
---
[root@hadoop1 ~]# yum install hadoop-client [root@hadoop1 ~]# cd /usr/lib/ [root@hadoop1 lib]# ll drwxr-xr-x 10 root root 4096 May 13 10:37 hadoop drwxr-xr-x 10 root root 4096 May 13 10:37 hadoop-0.20-mapreduce drwxr-xr-x 7 root root 4096 May 13 10:37 hadoop-hdfs drwxr-xr-x 6 root root 12288 May 13 10:37 hadoop-mapreduce drwxr-xr-x 7 root root 4096 May 13 10:37 hadoop-yarn [root@hadoop1 lib]# cd /etc/hadoop [root@hadoop1 hadoop]# ll total 8 lrwxrwxrwx 1 root root 29 May 13 10:37 conf -> /etc/alternatives/hadoop-conf lrwxrwxrwx 1 root root 10 May 13 10:37 conf.dist -> conf.empty drwxr-xr-x 2 root root 4096 May 13 10:37 conf.empty drwxr-xr-x 2 root root 4096 May 13 10:37 conf.impala [root@hadoop1 hadoop]# update-alternatives --display hadoop-conf hadoop-conf - status is auto. link currently points to /etc/hadoop/conf.empty /etc/hadoop/conf.empty - priority 10 /etc/hadoop/conf.impala - priority 5 Current `best' version is /etc/hadoop/conf.empty. [root@hadoop1 hadoop]# cd conf.empty/ [root@hadoop1 conf.empty]# ll total 104 -rw-r--r-- 1 root root 4436 Nov 22 14:09 capacity-scheduler.xml -rw-r--r-- 1 root root 1335 Nov 22 14:09 configuration.xsl -rw-r--r-- 1 root root 318 Nov 22 14:09 container-executor.cfg -rw-r--r-- 1 root root 904 Nov 22 14:09 core-site.xml -rw-r--r-- 1 root root 3032 Nov 22 14:09 fair-scheduler.xml -rw-r--r-- 1 root root 2598 Nov 22 14:09 hadoop-metrics2.properties -rw-r--r-- 1 root root 2490 Nov 22 14:09 hadoop-metrics.properties -rw-r--r-- 1 root root 9683 Nov 22 14:09 hadoop-policy.xml -rw-r--r-- 1 root root 1039 Nov 22 14:09 hdfs-site.xml -rw-r--r-- 1 root root 12601 Nov 22 14:09 log4j.properties -rw-r--r-- 1 root root 4113 Nov 22 14:09 mapred-queues.xml.template -rw-r--r-- 1 root root 904 Nov 22 14:09 mapred-site.xml -rw-r--r-- 1 root root 758 Nov 22 14:09 mapred-site.xml.template -rw-r--r-- 1 root root 10 Nov 22 14:09 slaves -rw-r--r-- 1 root root 2316 Nov 22 14:09 ssl-client.xml.example -rw-r--r-- 1 root root 2697 Nov 22 14:09 ssl-server.xml.example -rw-r--r-- 1 root root 4567 Nov 22 14:09 yarn-env.sh -rw-r--r-- 1 root root 2296 Nov 22 14:09 yarn-site.xml [root@hadoop1 tmp]# mkdir input [root@hadoop1 tmp]# cd input/ [root@hadoop1 input]# cp /etc/hadoop/conf/*.xml . [root@hadoop1 input]# cd .. [root@hadoop1 tmp]# hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar grep input output "dfs[a-z.]+" 19/05/17 10:53:20 INFO mapred.Task: Task attempt_local1615788708_0002_r_000000_0 is allowed to commit now 19/05/17 10:53:20 INFO output.FileOutputCommitter: Saved output of task 'attempt_local1615788708_0002_r_000000_0' to file:/tmp/output/_temporary/0/task_local1615788708_0002_r_000000 19/05/17 10:53:20 INFO mapred.LocalJobRunner: reduce > reduce 19/05/17 10:53:20 INFO mapred.Task: Task 'attempt_local1615788708_0002_r_000000_0' done. 19/05/17 10:53:20 INFO mapred.LocalJobRunner: Finishing task: attempt_local1615788708_0002_r_000000_0 19/05/17 10:53:20 INFO mapred.LocalJobRunner: reduce task executor complete. 19/05/17 10:53:21 INFO mapreduce.Job: Job job_local1615788708_0002 running in uber mode : false 19/05/17 10:53:21 INFO mapreduce.Job: map 100% reduce 100% 19/05/17 10:53:21 INFO mapreduce.Job: Job job_local1615788708_0002 completed successfully 19/05/17 10:53:21 INFO mapreduce.Job: Counters: 30 File System Counters FILE: Number of bytes read=55192 FILE: Number of bytes written=1338184 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 Map-Reduce Framework Map input records=2 Map output records=2 Map output bytes=47 Map output materialized bytes=57 Input split bytes=108 Combine input records=0 Combine output records=0 Reduce input groups=1 Reduce shuffle bytes=57 Reduce input records=2 Reduce output records=2 Spilled Records=4 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=38 Total committed heap usage (bytes)=270680064 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=161 File Output Format Counters Bytes Written=47 [root@hadoop1 tmp]# ll -l total 88 drwxr-xr-x 3 root root 4096 May 17 10:52 hadoop-root drwxr-xr-x 2 root root 4096 May 17 10:53 hsperfdata_root drwxr-xr-x 2 root root 4096 May 17 10:50 input drwx------. 2 root root 4096 Apr 15 17:51 keyring-sO07mP drwx------ 2 gdm gdm 4096 Apr 26 17:12 orbit-gdm drwxr-xr-x 2 root root 4096 May 17 10:53 output drwx------. 2 root root 4096 Apr 15 17:51 pulse-zf8TjJesGYwd drwx------ 2 gdm gdm 4096 Apr 26 17:12 pulse-zSmklD09U96Z -rw------- 1 root root 55359 May 13 10:34 yum_save_tx-2019-05-13-10-34HPxvyy.yumtx [root@hadoop1 tmp]# cd output/ [root@hadoop1 output]# ll total 4 -rw-r--r-- 1 root root 35 May 17 10:53 part-r-00000 -rw-r--r-- 1 root root 0 May 17 10:53 _SUCCESS [root@hadoop1 output]# cat part-r-00000 1 dfsadmin 1 dfs.namenode.name.dir
2 伪分布式环境
各个角色对应的rpm
HDFS NameNode: yum install hadoop-hdfs-namenode
HDFS SecondaryNameNode: yum install hadoop-hdfs-secondarynamenode
HDFS DataNode: yum install hadoop-hdfs-datanode
YARN ResourceManager: yum install hadoop-yarn-resourcemanager
YARN NodeManager: yum install hadoop-yarn-nodemanager
MapReduce: yum install hadoop-mapreduce
配置文件
各个配置文件在/etc/hadoop/conf下
特别注意:由于单节点,HDFS block replica要设为1(默认3)
配置文件
core-site.xml: 整个集群最基础的配置文件
hdfs-site.xml: hdfs的配置文件
mapred-site.xml: MapReduce的配置文件
yarn-site.xml:yarn的配置文件
https://hadoop.apache.org/docs/r2.7.7/hadoop-project-dist/hadoop-common/SingleCluster.html Pseudo-Distributed Operation [root@hadoop1 ~]# yum install hadoop-hdfs-namenode yum install hadoop-hdfs-secondarynamenode yum install hadoop-hdfs-datanode yum install hadoop-yarn-resourcemanager yum install hadoop-yarn-nodemanager yum install hadoop-mapreduce [root@hadoop1 ~]# service hadoop-hdfs-namenode status Hadoop namenode is not running [FAILED] [root@hadoop1 ~]# ll /etc/init init/ init.conf init.d/ inittab [root@hadoop1 ~]# ll /etc/init.d/hadoop-* -rwxr-xr-x 1 root root 4617 Nov 22 14:10 /etc/init.d/hadoop-hdfs-datanode -rwxr-xr-x 1 root root 5381 Nov 22 14:10 /etc/init.d/hadoop-hdfs-namenode -rwxr-xr-x 1 root root 4468 Nov 22 14:10 /etc/init.d/hadoop-hdfs-secondarynamenode -rwxr-xr-x 1 root root 4487 Nov 22 14:10 /etc/init.d/hadoop-yarn-nodemanager -rwxr-xr-x 1 root root 4447 Nov 22 14:10 /etc/init.d/hadoop-yarn-resourcemanager [root@hadoop1 hadoop]# cp -R conf.empty conf.pseudo [root@hadoop1 hadoop]# cd conf.pseudo/ [root@hadoop1 conf.pseudo]# vim core-site.xml [root@hadoop1 conf.pseudo]# vim hdfs-site.xml [root@hadoop1 conf.pseudo]# vim mapred-site.xml [root@hadoop1 conf.pseudo]# vim yarn-site.xml --core-site.xml <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> </configuration> --hdfs-site.xml <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> </configuration> --mapred-site.xml <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> --yarn-site.xml <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration> [root@hadoop1 hadoop]# update-alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.pseudo 50 [root@hadoop1 hadoop]# update-alternatives --display hadoop-conf hadoop-conf - status is auto. link currently points to /etc/hadoop/conf.pseudo /etc/hadoop/conf.empty - priority 10 /etc/hadoop/conf.impala - priority 5 /etc/hadoop/conf.pseudo - priority 50 Current `best' version is /etc/hadoop/conf.pseudo. ##50 优先级 [root@hadoop1 hadoop]# ll /etc/hadoop/conf lrwxrwxrwx 1 root root 29 May 17 11:18 /etc/hadoop/conf -> /etc/alternatives/hadoop-conf [root@hadoop1 hadoop]# ll /etc/alternatives/hadoop-conf/
初始化HDFS
NameNode要format才能用
hadoop namenode -format
启动服务
service hadoop-hdfs-namenode start
service hadoop-hdfs-datanode start
service hadoop-yarn-resourcemanager start
service hadoop-yarn-nodemanager start
验证服务
WebUI:
NameNode: http://{hostname}:50070
ResourceManager: http://{hostname}:8088
命令行操作:
HDFS:hadoop fs -mkdir /user && hadoop fs -mkdir /user/{username}
MapReduce: hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar
[root@hadoop1 hadoop]# hadoop namenode -h
[root@hadoop1 hadoop]# su hdfs bash-4.1$ id uid=494(hdfs) gid=491(hdfs) groups=491(hdfs),501(hadoop) bash-4.1$ hadoop namenode -format 19/05/17 11:24:40 INFO namenode.FSImageFormatProtobuf: Image file /tmp/hadoop-hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 320 bytes saved in 0 seconds . 19/05/17 11:24:40 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0 19/05/17 11:24:40 INFO util.ExitUtil: Exiting with status 0 19/05/17 11:24:40 INFO namenode.NameNode: SHUTDOWN_MSG: /************************************************************ SHUTDOWN_MSG: Shutting down NameNode at hadoop1/192.168.19.69 ************************************************************/ bash-4.1$ cd /tmp/hadoop-hdfs/ bash-4.1$ ls -l total 4 drwxr-xr-x 3 hdfs hdfs 4096 May 17 11:24 dfs bash-4.1$ cd dfs/name/current/ bash-4.1$ ls -l total 16 -rw-r--r-- 1 hdfs hdfs 320 May 17 11:24 fsimage_0000000000000000000 -rw-r--r-- 1 hdfs hdfs 62 May 17 11:24 fsimage_0000000000000000000.md5 -rw-r--r-- 1 hdfs hdfs 2 May 17 11:24 seen_txid -rw-r--r-- 1 hdfs hdfs 201 May 17 11:24 VERSION 启动报错 [hadoop@hadoop1 ~]$ sudo service hadoop-hdfs-namenode restart Error: JAVA_HOME is not set and could not be found. Failed to stop Hadoop namenode. Return value: 1. [FAILED] Error: JAVA_HOME is not set and could not be found. Failed to start Hadoop namenode. Return value: 3 [FAILED] [root@hadoop1 hadoop]# java -version java version "1.8.0_191" Java(TM) SE Runtime Environment (build 1.8.0_191-b12) Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode) [hadoop@hadoop1 jdk]$ export declare -x HOSTNAME="hadoop1" declare -x JAVA_HOME="/opt/jdk" declare -x LANG="en_US.UTF-8" [root@hadoop1 conf.pseudo]# source hdoop-env.sh [root@hadoop1 conf.pseudo]# vim /etc/default/hadoop export JAVA_HOME=/opt/jdk [root@hadoop1 conf.pseudo]# source /etc/default/hadoop [root@hadoop1 conf.pseudo]# service hadoop-hdfs-namenode restart no namenode to stop Stopped Hadoop namenode: [ OK ] starting namenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.out Started Hadoop namenode: [ OK ] 日志 [root@hadoop1 conf.pseudo]# tail -n 20 /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.log 2019-05-17 14:19:40,920 INFO org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor: Rescanning after 30000 milliseconds 2019-05-17 14:19:40,920 INFO org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor: Scanned 0 directive(s) and 0 block(s) in 0 millisecond(s). 2019-05-17 14:20:10,920 INFO org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor: Rescanning after 30001 milliseconds [root@hadoop1 conf.pseudo]# jps 16742 NameNode 17047 Jps 分别启动其他服务 [root@hadoop1 conf.pseudo]# service hadoop-hdfs-datanode start starting datanode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-datanode-hadoop1.out Started Hadoop datanode (hadoop-hdfs-datanode): [ OK ] [root@hadoop1 conf.pseudo]# service hadoop-yarn-resourcemanager start starting resourcemanager, logging to /var/log/hadoop-yarn/yarn-yarn-resourcemanager-hadoop1.out Started Hadoop resourcemanager: [ OK ] [root@hadoop1 conf.pseudo]# service hadoop-yarn-nodemanager start starting nodemanager, logging to /var/log/hadoop-yarn/yarn-yarn-nodemanager-hadoop1.out Started Hadoop nodemanager: [ OK ] [root@hadoop1 conf.pseudo]# [root@hadoop1 ~]# [root@hadoop1 ~]# jps 17458 NodeManager 16742 NameNode 17192 ResourceManager 17081 DataNode 17583 Jps web访问 http://*:50070/dfshealth.html#tab-overview [root@hadoop1 ~]# w3m http://*:50070 [root@hadoop1 ~]# w3m http://*:8088 [root@hadoop1 ~]# sudo -u hdfs hadoop fs -ls / Error: JAVA_HOME is not set and could not be found. [root@hadoop1 ~]# cat >> /etc/default/hadoop-hdfs-datanode << EOF export JAVA_HOME=/opt/jdk EOF [root@hadoop1 ~]# cat >> /etc/default/hadoop-hdfs-namenode << EOF export JAVA_HOME=/opt/jdk EOF cat >> /etc/default/hadoop-0.20-mapreduce << EOF export JAVA_HOME=/opt/jdk EOF cat >> /etc/default/hadoop-hdfs-secondarynamenode << EOF export JAVA_HOME=/opt/jdk EOF cat >> /etc/default/hadoop-yarn-resourcemanager << EOF export JAVA_HOME=/opt/jdk EOF cat >> /etc/default/hadoop-yarn-nodemanager << EOF export JAVA_HOME=/opt/jdk EOF [root@hadoop1 ~]# source /etc/default/hadoop-hdfs-namenode [root@hadoop1 ~]# source /etc/default/hadoop-hdfs-datanode [root@hadoop1 ~]# source /etc/default/hadoop-hdfs-secondarynamenode [root@hadoop1 ~]# source /etc/default/hadoop-yarn-resourcemanager [root@hadoop1 ~]# source /etc/default/hadoop-yarn-nodemanager [root@hadoop1 ~]# source /etc/default/hadoop-0.20-mapreduce [root@hadoop1 ~]# sudo -u hdfs hadoop fs -ls / Error: JAVA_HOME is not set and could not be found. [root@hadoop1 ~]# hadoop fs -ls / [root@hadoop1 ~]# hadoop fs -mkdir -p /user/hadoop mkdir: Permission denied: user=root, access=WRITE, inode="/":hdfs:supergroup:drwxr-xr-x [root@hadoop1 ~]# su hdfs bash-4.1$ hadoop fs -ls / bash-4.1$ hadoop fs -mkdir -p /user/hadoop bash-4.1$ hadoop fs -chown hadoop:hadoop /user/hadoop -bash-4.1$ hadoop fs -put /etc/hadoop/conf/*.xml /user/hadoop/input/ bash-4.1$ hadoop fs -ls /user/hadoop/input Found 7 items -rw-r--r-- 1 hdfs hadoop 4436 2019-05-17 15:03 /user/hadoop/input/capacity-scheduler.xml -rw-r--r-- 1 hdfs hadoop 1010 2019-05-17 15:03 /user/hadoop/input/core-site.xml -rw-r--r-- 1 hdfs hadoop 3032 2019-05-17 15:03 /user/hadoop/input/fair-scheduler.xml -rw-r--r-- 1 hdfs hadoop 9683 2019-05-17 15:03 /user/hadoop/input/hadoop-policy.xml -rw-r--r-- 1 hdfs hadoop 987 2019-05-17 15:03 /user/hadoop/input/hdfs-site.xml -rw-r--r-- 1 hdfs hadoop 1009 2019-05-17 15:03 /user/hadoop/input/mapred-site.xml -rw-r--r-- 1 hdfs hadoop 1015 2019-05-17 15:03 /user/hadoop/input/yarn-site.xml bash-4.1$ hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar grep /user/hadoop/input /user/hadoop/output "dfs[a-z.]+" 19/05/17 15:05:44 INFO mapred.Task: Task:attempt_local1573914124_0002_r_000000_0 is done. And is in the process of commiting 19/05/17 15:05:44 INFO mapred.LocalJobRunner: 19/05/17 15:05:44 INFO mapred.Task: Task attempt_local1573914124_0002_r_000000_0 is allowed to commit now 19/05/17 15:05:44 INFO mapred.FileOutputCommitter: Saved output of task 'attempt_local1573914124_0002_r_000000_0' to hdfs://localhost:9000/user/hadoop/output 19/05/17 15:05:44 INFO mapred.LocalJobRunner: reduce > reduce 19/05/17 15:05:44 INFO mapred.Task: Task 'attempt_local1573914124_0002_r_000000_0' done. 19/05/17 15:05:45 INFO mapred.JobClient: map 100% reduce 100% 19/05/17 15:05:45 INFO mapred.JobClient: Job complete: job_local1573914124_0002 19/05/17 15:05:45 INFO mapred.JobClient: Counters: 26 19/05/17 15:05:45 INFO mapred.JobClient: File System Counters 19/05/17 15:05:45 INFO mapred.JobClient: FILE: Number of bytes read=581599 19/05/17 15:05:45 INFO mapred.JobClient: FILE: Number of bytes written=1254598 19/05/17 15:05:45 INFO mapred.JobClient: FILE: Number of read operations=0 19/05/17 15:05:45 INFO mapred.JobClient: FILE: Number of large read operations=0 19/05/17 15:05:45 INFO mapred.JobClient: FILE: Number of write operations=0 19/05/17 15:05:45 INFO mapred.JobClient: HDFS: Number of bytes read=42630 19/05/17 15:05:45 INFO mapred.JobClient: HDFS: Number of bytes written=315 19/05/17 15:05:45 INFO mapred.JobClient: HDFS: Number of read operations=80 19/05/17 15:05:45 INFO mapred.JobClient: HDFS: Number of large read operations=0 19/05/17 15:05:45 INFO mapred.JobClient: HDFS: Number of write operations=20 19/05/17 15:05:45 INFO mapred.JobClient: Map-Reduce Framework 19/05/17 15:05:45 INFO mapred.JobClient: Map input records=2 19/05/17 15:05:45 INFO mapred.JobClient: Map output records=2 19/05/17 15:05:45 INFO mapred.JobClient: Map output bytes=41 19/05/17 15:05:45 INFO mapred.JobClient: Input split bytes=115 19/05/17 15:05:45 INFO mapred.JobClient: Combine input records=0 19/05/17 15:05:45 INFO mapred.JobClient: Combine output records=0 19/05/17 15:05:45 INFO mapred.JobClient: Reduce input groups=1 19/05/17 15:05:45 INFO mapred.JobClient: Reduce shuffle bytes=0 19/05/17 15:05:45 INFO mapred.JobClient: Reduce input records=2 19/05/17 15:05:45 INFO mapred.JobClient: Reduce output records=2 19/05/17 15:05:45 INFO mapred.JobClient: Spilled Records=4 19/05/17 15:05:45 INFO mapred.JobClient: CPU time spent (ms)=0 19/05/17 15:05:45 INFO mapred.JobClient: Physical memory (bytes) snapshot=0 19/05/17 15:05:45 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0 19/05/17 15:05:45 INFO mapred.JobClient: Total committed heap usage (bytes)=260194304 19/05/17 15:05:45 INFO mapred.JobClient: org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter 19/05/17 15:05:45 INFO mapred.JobClient: BYTES_READ=57