HDFS安装
1.下载Hadoop安装包
hadoop-2.6.0-cdh5.10.0.tar.gz上传至app目录
解压
软连接
2.修改配置文件
vi hdfs-site.xml vi hdfs-site.xml
<configuration> <property> <name>dfs.replication</name> <value>3</value> <!--数据块副本为3--> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.permissions.enabled</name> <value>false</value> </property> <!--权限默认值为false--> <property> <name>dfs.nameservices</name> <value>mycluster</value> </property> <!--命名空间,它的值与fs.defaultFS的值对应,namenode高可用之后由两个namenode,mycluster是对外提供的统一入口--> <property> <name>dfs.ha.namenodes.mycluster</name> <value>nn1,nn2</value> </property> <!--指定nameserver是mycluster时的nameNode有哪些,这里的值也是逻辑名称,名字随便,不重复即可--> <property> <name>dfs.namenode.rpc-address.mycluster.nn1</name> <value>hadoop01:9000</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn1</name> <value>hadoop01:50070</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn2</name> <value>hadoop02:9000</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn2</name> <value>hadoop02:50070</value> </property> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <!--启动故障自动恢复--> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://hadoop01:8485;hadoop02:8485;hadoop03:8485/mycluster</value> </property> <!--指定NameNode的元数据在JournalNode上的存放位置--> <property> <name>dfs.client.failover.proxy.provider.mycluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!--指定mycluster 出故障时,哪个实现类负责执行故障切换--> <property> <name>dfs.journalnode.edits.dir</name> <value>/home/root1/data/journaldata/jn</value> </property> <!--指定journalnode在本地磁盘存放数据的位置--> <property> <name>dfs.ha.fencing.methods</name> <value>shell(/bin/true)</value> </property> <!--配置隔离机制,shell通过ssh连接active namenode节点,杀掉进程--> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/home/root1/.ssh/id_rsa</value> </property> <!--为了实现ssh登录杀掉进程,还需要配置免密码登录的SSH密钥信息--> <property> <name>dfs.ha.fencing.ssh.connect-timeout</name> <value>10000</value> </property> <property> <name>dfs.namenode.handler.count</name> <value>100</value> </property> </configuration>
vi core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://mycluster</value> </property> <!--默认的路径--> <property> <name>hadoop.tmp.dir</name> <value>/home/root1/data/tmp</value> </property> <!--hadoop的临时目录,如果需要配置多个目录,需要用逗号隔开--> <property> <name>ha.zookeeper.quorum</name> <value>hadoop01:2181,hadoop02:2181,hadoop03:2181</value> </property> <!--配置zookeeper 管理HDFS--> </configuration>
vi slaves
hadoop01
hadoop02
hadoop03
vi hadoop-env.sh
# The java implementation to use. export JAVA_HOME=/home/root1/app/jdk export HADOOP_HOME=/home/root1/app/hadoop
3.测试
启动所有zookeeper节点
启动所有journalnode节点
/home/root1/app/hadoop/sbin/hadoop-daemon.sh start journalnode
jps查看journalnode是否存在在进程里
nn1节点格式化 namenode
bin/hdfs namenode -format
nn1节点格式化zkfc
bin/hdfs zkfc -formatZK
nn1节点启动namenode
bin/hdfs namenode
nn2节点同步nn1节点元数据信息
bin/hdfs namenode -bootstrapStandby
nn2同步完nn1节点信息后ctrl+c关闭nn1节点namenode进程
关闭所有节点
/sbin/hadoop-daemon.sh stop journalnode
一键启动HDFS
sbin/start-dfs.sh
hdfs启动后可以使用如下命令查看namenoe状态
bin/hdfs haadmin -getServiceState.nn1
bin/hdfs haadmin -getServiceState.nn2
Web界面查看hdfs
http://ip:50070
测试hdfs
本地目录新建文件wd.txt
HDFS文件系统新建test目录
bin/hdfs mkdir /test
bin/hdfs dfs -ls /
将wd,txt文件上传至test目录
bin/hdfs dfs -put wd.txt /test/
bin/hdfs dfs -ls/test