安装jdk
rpm -ivh /opt/downloads/jdk-8u201-linux-x64.rpm
vim /etc/profile 最下方加入
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64 export JAVA_BIN=/usr/java/jdk1.8.0_201-amd64/bin export PATH=$PATH:$JAVA_HOME/bin export CLASSPATH=:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
source /etc/profile
vim /etc/bashrc 最下方加入
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64 export JAVA_BIN=/usr/java/jdk1.8.0_201-amd64/bin export PATH=$PATH:$JAVA_HOME/bin export CLASSPATH=:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
source /etc/bashrc
下载hadoop
wget -P /opt/downloads http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.0/hadoop-3.2.0.tar.gz
tar zxvf /opt/downloads/hadoop-3.2.0.tar.gz -C /opt
mv /opt/hadoop-3.2.0/ /opt/hadoop
cp /opt/hadoop/etc/hadoop/*.xml input
/opt/hadoop/bin/hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.0.jar grep input output 'dfs[a-z.]+'
创建文件夹
mkdir /opt/hadoop/tmp
mkdir /opt/hadoop/var
mkdir /opt/hadoop/dfs
mkdir /opt/hadoop/dfs/name
mkdir /opt/hadoop/dfs/data
mkdir /opt/hadoop/fs/checkpoint -p
mkdir /opt/hadoop/fs/checkpoint/edits
mkdir /opt/hadoop/jobhistory/done -p
mkdir /opt/hadoop/jobhistory/done_intermediate -p
查看java_home路径
echo $JAVA_HOME
修改配置
文件系统
vim /opt/hadoop/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>hadoop.tmp.dir</name> <value>../tmp</value> <description>namenode上本地的hadoop临时文件夹</description> </property> <property> <name>io.file.buffer.size</name> <value>262144</value> <description>Size of read/write buffer used in SequenceFiles.256k</description> </property> <property> <name>io.native.lib.available</name> <value>true</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://0.0.0.0:9000</value> <description>HDFS的URI,文件系统://namenode标识:端口号</description> </property> </configuration>
name 节点用 fs.defaultFS,不建议使用 fs.default.name。
hadoop.tmp.dir 是hadoop文件系统依赖的基础配置,很多路径都依赖它。如果下面hdfs-site.xml中不配置 namenode 和 datanode 的存放位置,默认就放在如下路径中。
副本
vim /opt/hadoop/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.replication</name> <value>1</value> <description>副本个数,配置默认是3,应小于datanode机器数量</description> </property> <property> <name>dfs.namenode.http-address</name> <value>0.0.0.0:9870</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>../dfs/name</value> <description>Path on the local filesystem where the NameNode stores the namespace and transactions logs persistently.</description> </property> <property> <name>dfs.datanode.data.dir</name> <value>../dfs/data</value> <description>Comma separated list of paths on the localfilesystem of a DataNode where it should store its blocks.</description> </property> <property> <name>dfs.permissions</name> <value>false</value> <description>need not permissions</description> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.block.size</name> <value>134217728</value> <description>HDFS blocksize of 128MB for large file-systems.used for on-line</description> </property> <property> <name>fs.checkpoint.dir</name> <value>../checkpoint</value> </property> <property> <name>fs.checkpoint.edits.dir</name> <value>../checkpoint</value> </property> </configuration>
资源调度框架
vim /opt/hadoop/etc/hadoop/mapred-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> <description>设置MapReduce的运行平台为yarn。local表示本地运行,classic表示经典mapreduce框架,yarn表示新的框架。</description> </property> <property> <name>mapred.job.tracker</name> <value>0.0.0.0:9001</value> </property> <property> <name>mapred.local.dir</name> <value>../var</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>0.0.0.0:10020</value> <description>MapReduce JobHistory Server IPC host:port</description> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>0.0.0.0:19888</value> <description>MapReduce JobHistory Server Web UI host:port</description> </property> <property> <name>mapreduce.jobhistory.done-dir</name> <value>../jobhistory/done</value> <description>MapReduce作业运行完之后放在哪</description> </property> <property> <name>mapreduce.jobhistory.intermediate-done-dir</name> <value>../jobhistory/done_intermediate</value> <description>MapReduce正在运行中的作业放在哪</description> </property> <property> <name>yarn.app.mapreduce.am.resource.mb</name> <value>512</value> </property> <property> <name>mapreduce.map.memory.mb</name> <value>512</value> </property> <property> <name>mapreduce.map.java.opts</name> <value>-Xmx512M</value> </property> <property> <name>mapreduce.reduce.memory.mb</name> <value>512</value> </property> <property> <name>mapreduce.reduce.java.opts</name> <value>-Xmx512M</value> </property> <property> <name>mapred.child.java.opts</name> <value>-Xmx512M</value> </property> </configuration>
vim /opt/hadoop/etc/hadoop/yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> <description>NodeManager上运行的附属服务</description> </property> <property> <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>0.0.0.0:8031</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>0.0.0.0:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>0.0.0.0:8030</value> </property> <property> <name>yarn.resourcemanager.admin.address</name> <value>0.0.0.0:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>0.0.0.0:48088</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <name>yarn.nodemanager.env-whitelist</name> <value> JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ</value> <description>容器可能会覆盖的环境变量,而不是使用NodeManager的默认值</description> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> <description>关闭内存检测,虚拟机需要,虚拟环境不设置为false会报错</description> </property> </configuration>
我机器上influxdb占用了8088端口,这里换成18088
vim /opt/hadoop/sbin/start-dfs.sh
头部加上
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim /opt/hadoop/sbin/stop-dfs.sh
头部加上
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim /opt/hadoop/sbin/start-yarn.sh
头部加上
YARN_RESOURCEMANAGER_USER=root
HDFS_DATANODE_SECURE_USER=yarn
YARN_NODEMANAGER_USER=root
vim /opt/hadoop/sbin/stop-yarn.sh
头部加上
YARN_RESOURCEMANAGER_USER=root
HDFS_DATANODE_SECURE_USER=yarn
YARN_NODEMANAGER_USER=root
vim /opt/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64 export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export HADOOP_PID_DIR=/var/run
修改jdk路径,不能有空格
pid_dir影响 NameNode DataNode SecondaryNameNode 的进程pid存储
vim /opt/hadoop/etc/hadoop/mapred-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
export HADOOP_MAPRED_PID_DIR=/var/run
pid_dir影响 JobHistoryServer 的进程pid存储
vim /opt/hadoop/etc/hadoop/yarn-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
export HADOOP_PID_DIR=/var/run
pid_dir影响 NodeManager ResourceManager 的进程pid存储
检查ssh免密登录
ssh localhost
配置ssh免密登录
[root@localhost ~]# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
[root@localhost ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@localhost ~]# chmod 0600 ~/.ssh/authorized_keys
再次检查
ssh localhost
//查看版本
/opt/hadoop/bin/hadoop version
格式化 namenode
/opt/hadoop/bin/hdfs namenode -format
成功会看到 “successfully formatted” 和 “Exitting with status 0”,若为 “Exitting with status 1” 则是出错。
启动
/opt/hadoop/sbin/start-dfs.sh
/opt/hadoop/sbin/stop-dfs.sh
/opt/hadoop/sbin/start-all.sh
查看服务 jps
验证
/opt/hadoop/bin/hadoop fs -ls /
curl -X Get http://localhost:9870
curl -X Get http://localhost:18088