Hadoop 2.6安装文档
版本说明:hadoop 2.6 linux-64位
1、 Ssh无密码
ssh-keygen
vimauthorized_keys
把每台机子上的id_rsa.pub内容拷贝到authorized_keys
Scp到其它机器上
2、 jdk安装
解压目录
jdk环境变量
3、 Zookeeper安装
4、 必要目录创建
NameNode数据目录 /data/nn
DataNode数据目录 /data/dn
JournalNode数据目录 /data/jn
Yarn数据目录 /data/yarn/local
5、 修改配置文件
Core-site.xml
Hdfs-site.xml
Slaves
Yarn-site.xml
注:修改对数,详见附件
6、 配置环境变量
exportJAVA_HOME=/usr/local/jdk1.7.0_75
exportJRE_HOME=/usr/local/jdk1.7.0_75/jre
exportPATH=$PATH:/usr/local/jdk1.7.0_75/bin
exportCLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
exportZOOKEEPER_HOME=/usr/local/zookeeper-3.4.6
PATH=$ZOOKEEPER_HOME/bin:$PATH
exportHADOOP_HOME=/usr/local/hadoop-2.6.0
exportPATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
7、 启动集群
启动zookeeper
ZkServer.shstart
格式化zookeeper
bin/hdfszkfc –formatZK
启动journalnode
sbin/hadoop-daemon.shstart journalnode
格式化Namenode
bin/hdfsnamenode -format
启动格式化的namenode
sbin/hadoop-daemon.sh startnamenode
同步namenode
bin/hdfs namenode–bootstrapStandby
启动同步的namenode
sbin/hadoop-daemon.shstart namenode
启动datanode
hdfsdatanode >null 2>&1 &
启动resourcemanager
yarnresourcemanager >null 2>&1 &
启动nodemanager
yarnnodemanager >null 2>&1 &
启动zkfc
hdfszkfc >null 2>&1 &
附件
Core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://mycluster</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>database:2181,spark02:2181,spark03:2181,spark04:2181,spark05:2181</value> </property> </configuration>
Slave
spark03 spark04 spark05
yarn-site.xml
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <description>Listof directories to store localized files in.</description> <name>yarn.nodemanager.local-dirs</name> <value>file:///data/yarn/local</value> </property> <property> <description>Classpathfor typical applications.</description> <name>yarn.application.classpath</name> <value> $HADOOP_CONF_DIR, $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*, $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*, $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*, $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/* </value> </property> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>database</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>spark02</value> </property> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>database:2181,spark02:2181,spark03:2181,spark04:2181,spark05:2181</value> <description>For multiple zk services, separate themwith comma</description> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>yarn-ha</value> </property> </configuration>
Hdfs-site.xml
<configuration> <property> <name>dfs.permissions.superusergroup</name> <value>hadoop</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:///data/nn</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:///data/dn</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.webhdfs.user.provider.user.pattern</name> <value>^[A-Za-z0-9_][A-Za-z0-9._-]*[$]?$</value> </property> <property> <name>dfs.nameservices</name> <value>mycluster</value> </property> <property> <name>dfs.ha.namenodes.mycluster</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn1</name> <value>database:8020</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn2</name> <value>spark02:8020</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn1</name> <value>database:50070</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn2</name> <value>spark02:50070</value> </property> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://database:8485;spark02:8485;spark03:8485/mycluster</value> </property> <property> <name>dfs.journalnode.edits.dir</name> <value>/data/jn</value> </property> <property> <name>dfs.client.failover.proxy.provider.mycluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>dfs.client.read.shortcircuit</name> <value>true</value> </property> <property> <name>dfs.domain.socket.path</name> <value>${hadoop.tmp.dir}/sockets/dn._PORT</value> </property> <property> <name>dfs.client.read.shortcircuit.streams.cache.size</name> <value>1000</value> </property> <property> <name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name> <value>10000</value> </property> <property> <name>dfs.datanode.hdfs-blocks-metadata.enabled</name> <value>true</value> </property> </configuration>