前情提示:实践环境为kvm 的Centos6.9!
1.Hadoop 本地模式配置
1.1 安装JDK
[root@chenjiaxin-1 ~]# tar zxvf jdk-8u102-linux-x64.tar.gz -C /usr/local/
1.2 设置JDK环境变量
[root@chenjiaxin-1 ~]# tail -5 /etc/profile
#JDK Env
export JAVA_HOME=/usr/local/jdk1.8.0_102
export PATH=$JAVA_HOME/bin:$PATH
1.3 验证JDK
[root@chenjiaxin-1 ~]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
[root@chenjiaxin-1 ~]#
1.4 安装Hadoop
[root@chenjiaxin-1 ~]# tar zxvf hadoop-2.7.3.tar.gz -C /usr/local/
1.5 验证Hadoop本地模式是否安装成功
[root@chenjiaxin-1 hadoop-2.7.3]# bin/hadoop version
Hadoop 2.7.3
Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r baa91f7c6bc9cb92be5982de4719c1c8af91ccff
Compiled by root on 2016-08-18T01:41Z
Compiled with protoc 2.5.0
From source with checksum 2e4ce5f957ea4db193bce3734ff29ff4
This command was run using /usr/local/hadoop-2.7.3/share/hadoop/common/hadoop-common-2.7.3.jar
[root@chenjiaxin-1 hadoop-2.7.3]#
1.6 测试本地模式
例子是搜索input文件夹内所有文件,找到以dfs开始后面跟着字母a-z的单词,并输出在output目录里。
[root@chenjiaxin-1 hadoop-2.7.3]# mkdir input
[root@chenjiaxin-1 hadoop-2.7.3]# cp etc/hadoop/*.xml input
[root@chenjiaxin-1 hadoop-2.7.3]# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'
[root@chenjiaxin-1 hadoop-2.7.3]# cat output/*
1 dfsadmin
[root@chenjiaxin-1 hadoop-2.7.3]# ls output/*
output/part-r-00000 output/_SUCCESS
[root@chenjiaxin-1 hadoop-2.7.3]# ls -a output/
. .. part-r-00000 .part-r-00000.crc _SUCCESS ._SUCCESS.crc
[root@chenjiaxin-1 hadoop-2.7.3]#
本地模式验证完毕END
2.单节点(伪分布式)部署与配置
2.1 修改hadoop-env.sh
在基于本地模式的前提下, 编辑下面几个文件
[root@chenjiaxin-1 hadoop-2.7.3]# cd /usr/local/hadoop-2.7.3/
[root@chenjiaxin-1 hadoop-2.7.3]# cd etc/hadoop/
[root@chenjiaxin-1 hadoop]# vim hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_102
2.2 修改core-site.xml
[root@chenjiaxin-1 hadoop]# vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value> #可以写具体IP 或主机名
</property>
</configuration>
2.3 修改hdfs-site.xml
[root@chenjiaxin-1 ~]# mkdir /hadoopdata/hdfs/namenode -p
[root@chenjiaxin-1 ~]# mkdir /hadoopdata/hdfs/datanode -p
[root@chenjiaxin-1 ~]# chmod 777 -R /hadoopdata/hdfs/datanode
[root@chenjiaxin-1 ~]# cd -
[root@chenjiaxin-1 hadoop]# vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:///hadoopdata/hdfs/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:///hadoopdata/hdfs/datanode</value>
</property>
</configuration>
2.4 修改mapred-site.xml
[root@chenjiaxin-1 hadoop]# cp mapred-site.xml.template mapred-site.xml
[root@chenjiaxin-1 hadoop]# vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
2.5 修改yarn-site.xml
[root@chenjiaxin-1 hadoop]# vim yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
2.6 设置无密码登陆
[root@chenjiaxin-1 ~]# ssh 10.122.18.3
[root@chenjiaxin-1 ~]# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
[root@chenjiaxin-1 ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
2.7执行MapRduce Job
2.7.1 格式化文件系统
[root@chenjiaxin-1 hadoop-2.7.3]# bin/hdfs namenode -format
2.7.2 启动NameNode 和DataNode 守护程序
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-dfs.sh
Starting namenodes on [chenjiaxin-1]
The authenticity of host 'chenjiaxin-1 (fe80::f816:3eff:fed9:3684%eth0)' can't be established.jps
ECDSA key fingerprint is a5:69:7e:64:86:13:44:b9:3f:4b:76:aa:31:70:98:67.
Are you sure you want to continue connecting (yes/no)? yes
chenjiaxin-1: Warning: Permanently added 'chenjiaxin-1,fe80::f816:3eff:fed9:3684%eth0' (ECDSA) to the list of known hosts.
chenjiaxin-1: starting namenode, logging to /usr/local/hadoop-2.7.3/logs/hadoop-root-namenode-chenjiaxin-1.out
The authenticity of host 'localhost (::1)' can't be established.
ECDSA key fingerprint is a5:69:7e:64:86:13:44:b9:3f:4b:76:aa:31:70:98:67.
Are you sure you want to continue connecting (yes/no)? yes
localhost: Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
localhost: starting datanode, logging to /usr/local/hadoop-2.7.3/logs/hadoop-root-datanode-chenjiaxin-1.out
Starting secondary namenodes [0.0.0.0]
The authenticity of host '0.0.0.0 (0.0.0.0)' can't be established.
ECDSA key fingerprint is a5:69:7e:64:86:13:44:b9:3f:4b:76:aa:31:70:98:67.
Are you sure you want to continue connecting (yes/no)? yes
0.0.0.0: Warning: Permanently added '0.0.0.0' (ECDSA) to the list of known hosts.
0.0.0.0: starting secondarynamenode, logging to /usr/local/hadoop-2.7.3/logs/hadoop-root-secondarynamenode-chenjiaxin-1.out
[root@chenjiaxin-1 hadoop-2.7.3]#
[root@chenjiaxin-1 hadoop-2.7.3]# jps
7009 DataNode
7299 Jps
6884 NameNode
7175 SecondaryNameNode
2.7.3 访问NameNode
http://localhost:50070
2.8 启动 ResourceManager 和 NodeManager 守护程序
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop-2.7.3/logs/yarn-root-resourcemanager-chenjiaxin-1.out
localhost: starting nodemanager, logging to /usr/local/hadoop-2.7.3/logs/yarn-root-nodemanager-chenjiaxin-1.out
[root@chenjiaxin-1 hadoop-2.7.3]#
2.9 访问ResourceManager
http://IPaddr:8088
2.10 Hadoop进程管理
2.10.1 手动逐步启动
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-dfs.sh
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-yarn.sh
2.10.2 一次性启动服务
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-all.sh
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/stop-all.sh
3.HDFS 常用命令演练
4.Hadoop多节点部署与配置(待确认)
4.1 安装JDK
[root@chenjiaxin-1 ~]# tar zxvf jdk-8u102-linux-x64.tar.gz -C /usr/local/
4.2 设置JDK环境变量
[root@chenjiaxin-1 ~]# tail -5 /etc/profile
#JDK ENV
JAVA_HOME=/usr/local/jdk1.8.0_102
HADOOP_HOME=/usr/local/hadoop-2.7.3
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$PATH
4.3 验证JDK
[root@chenjiaxin-1 ~]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
[root@chenjiaxin-1 ~]#
4.4 安装Hadoop
[root@chenjiaxin-1 ~]# tar zxvf hadoop-2.7.3.tar.gz -C /usr/local/
4.5 验证Hadoop是否安装成功
[root@chenjiaxin-1 ~]# /usr/local/hadoop-2.7.3/bin/hadoop version
Hadoop 2.7.3
Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r baa91f7c6bc9cb92be5982de4719c1c8af91ccff
Compiled by root on 2016-08-18T01:41Z
Compiled with protoc 2.5.0
From source with checksum 2e4ce5f957ea4db193bce3734ff29ff4
This command was run using /usr/local/hadoop-2.7.3/share/hadoop/common/hadoop-common-2.7.3.jar
[root@chenjiaxin-1 ~]#
4.6 节点间无密码登录
[root@chenjiaxin-1 ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
6e:e7:f8:a0:f7:82:22:95:9f:e0:11:b8:cb:13:f6:b6 root@chenjiaxin-1.novalocal
The key's randomart image is:
+--[ RSA 2048]----+
| |
| |
| . |
| . . |
| . o S |
| + = . |
| o * + o+ . |
| = = +oo= |
| +Eo...++ |
+-----------------+
[root@chenjiaxin-1 ~]# ssh-copy-id localhost
The authenticity of host 'localhost (::1)' can't be established.
ECDSA key fingerprint is a5:69:7e:64:86:13:44:b9:3f:4b:76:aa:31:70:98:67.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
root@localhost's password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh 'localhost'"
and check to make sure that only the key(s) you wanted were added.
4.7 Hadoop配置
4.7.1 修改hadoop-env.sh
[root@chenjiaxin-1 hadoop]# pwd
/usr/local/hadoop-2.7.3/etc/hadoop
[root@chenjiaxin-1 hadoop]# vim hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_102
4.7.2 修改core-site.xml
[root@chenjiaxin-1 hadoop]# vim core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://chenjiaxin-1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
</configuration>
[root@chenjiaxin-1 hadoop]# mkdir -p /data/hadoop/tmp
4.7.3 修改hdfs-site.xml
[root@chenjiaxin-1 hadoop]# vim hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>chenjiaxin-1:50070</value>
<!---HDFS Web查看主机和端口-->
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
<!-- HDFS 关闭文件权限 -->
</property>
</configuration>
[root@chenjiaxin-1 hadoop]# mkdir -p /data/hadoop/hdfs/name
[root@chenjiaxin-1 hadoop]# mkdir -p /data/hadoop/hdfs/data
4.7.4修改 mapred-site.xml
[root@chenjiaxin-1 hadoop]# vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobtracker.http.address</name>
<value>chenjiaxin-1:50030</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>chenjiaxin-1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>chenjiaxin-1:19888</value>
</property>
</configuration>
4.7.5 修改yarn-site.xml
<configuration>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>63000</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>63000</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>63</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>chenjiaxin-1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>chenjiaxin-1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>chenjiaxin-1:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>chenjiaxin-1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>chenjiaxin-1:8088</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
</configuration>
4.7.6 修改slave文件和masters文件(没有则创建)
[root@chenjiaxin-1 hadoop]# cat slaves
chenjiaxin-2.novalocal
chenjiaxin-3.novalocal
4.8 修改/etc/hosts文件
[root@chenjiaxin-1 hadoop]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.1xx.xxx chenjiaxin-1.novalocal chenjiaxin-1
10.1xx.xx.xx chenjiaxin-2.novalocal chenjiaxin-2
10.1xx.xx.xx chenjiaxin-3.novalocal chenjiaxin-3
4.9 拷贝配置文件与秘钥至其他节点
4.9.1 拷贝/etc/hosts 文件
[root@chenjiaxin-1 hadoop]# scp /etc/hosts chenjiaxin-2:/etc/
[root@chenjiaxin-1 hadoop]# scp /etc/hosts chenjiaxin-3:/etc/
4.9.2 拷贝秘钥文件
[root@chenjiaxin-1 ~]# scp -r /root/.ssh chenjiaxin-2:/root
[root@chenjiaxin-1 ~]# scp -r /root/.ssh chenjiaxin-3:/root
4.9.3 拷贝/etc/profile
[root@chenjiaxin-1 hadoop]# scp /etc/profile chenjiaxin-2:/etc/
[root@chenjiaxin-1 hadoop]# scp /etc/profile chenjiaxin-3:/etc/
4.9.4 拷贝 hadoop+JDK
[root@chenjiaxin-1 hadoop]# scp -r /usr/local/jdk1.8.0_102 chenjiaxin-2:/usr/local/
[root@chenjiaxin-1 hadoop]# scp -r /usr/local/jdk1.8.0_102 chenjiaxin-3:/usr/local/
[root@chenjiaxin-1 hadoop]# scp -r /usr/local/hadoop-2.7.3 chenjiaxin-2:/usr/local/
[root@chenjiaxin-1 hadoop]# scp -r /usr/local/hadoop-2.7.3 chenjiaxin-3:/usr/local/
4.10 其他节点操作
chenjiaxin-2
[root@chenjiaxin-2 ~]# source /etc/profile
[root@chenjiaxin-2 ~]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
[root@chenjiaxin-2 ~]# mkdir -p /data/hadoop/tmp
[root@chenjiaxin-2 ~]# mkdir -p /data/hadoop/hdfs/name
[root@chenjiaxin-2 ~]# mkdir -p /data/hadoop/hdfs/data
chenjiaxin-3
[root@chenjiaxin-3 ~]# source /etc/profile
[root@chenjiaxin-3 ~]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
[root@chenjiaxin-3 ~]# mkdir -p /data/hadoop/tmp
[root@chenjiaxin-3 ~]# mkdir -p /data/hadoop/hdfs/name
[root@chenjiaxin-3 ~]# mkdir -p /data/hadoop/hdfs/data
4.11 格式化 HDFS与启动服务
[root@chenjiaxin-1 hadoop-2.7.3]# bin/hdfs namenode -format
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-all.sh
[root@chenjiaxin-1 hadoop-2.7.3]# jps
注意事项:
但是此时发现slave节点没有启动datanode
上网查找解决方法,最后终于解决了,解决方法如下:
1. 先执行stop-all.sh暂停所有服务
2. 将所有Salve节点上的tmp(即 hdfs-site.xml 中指定的 dfs.data.dir 文件夹,DataNode存放数据块的位置)、 logs 文件夹删除 , 然后重新建立tmp , logs 文件夹
3. 将所有Salve节点上的/usr/hadoop/conf下的core-site.xml删除,将master节点的core-site.xml文件拷贝过来,到各个Salve节点1.scp /usr/hadoop/conf/core-site.xml hadoop@slave1:/usr/hadoop/conf/
4. 重新格式化: hadoop namenode -format
5. 启动:start-all.sh
hadoop中启动namenode等出现的一些问题
1、先运行stop-all.sh
2、格式化namdenode,不过在这之前要先删除原目录,即core-site.xml下配置的<name>Hadoop.tmp.dir</name>所指向的目录,删除后切记要重新建立配置的空目录,然后运行hadoop namenode -format
3、运行start-all.sh
以上顺序很重要!!!
5.Zookeeper集群实战
5.1 环境准备工作
关闭防火墙
禁用SELINUX
配置SSH无密码登录
[root@zk-1 ~]# systemctl disable firewalld
[root@zk-1 ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
[root@zk-1 ~]# sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
....
5.2 安装JDK
[root@zk-1 ~]# tar zxvf jdk-8u102-linux-x64.tar.gz -C /usr/local/
5.3 设置环境变量
[root@zk-1 jdk1.8.0_102]# vim /etc/profile
#JDK ENV
export JAVA_HOME=/usr/local/jdk1.8.0_102
export PATH=$JAVA_HOME/bin:$PATH
[root@zk-1 jdk1.8.0_102]# source /etc/profile
[root@zk-1 jdk1.8.0_102]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
5.4 安装zookeeper
[root@zk-1 ~]# wget http://mirrors.cnnic.cn/apache/zookeeper/zookeeper-3.4.8/zookeeper-3.4.8.tar.gz
[root@zk-1 ~]# tar zxvf zookeeper-3.4.8.tar.gz -C /usr/local/
[root@zk-1 ~]# cd /usr/local/zookeeper-3.4.8/conf/
5.5 配置zookeeper
配置说明:
目前myid的取值范围为1-255,节点的myid在单个集群内必须唯一 如果不指定myid,集群模式下会无法启动,只能以standalone模式运行
zk-1
注意tmp目录的路径
[root@zk-1 conf]# mkdir -p tmp/zookeeper
[root@zk-1 conf]# echo "1" > tmp/zookeeper/myid
注意zoo.cfg里面的datadir
在zoo.cfg 文件增加下面的节点信息
server.1=zk-1:2888:3888
server.2=zk-2:2888:3888
server.3=zk-3:2888:3888
zk-2
[root@zk-1 ~]# scp -r /usr/local/jdk1.8.0_102 /usr/local/zookeeper-3.4.8 root@zk-3:/usr/local/
[root@zk-1 ~]# scp /etc/profile root@zk-2:/etc
root@zk-2's password:
profile
[root@zk-2 conf]# mkdir -p tmp/zookeeper
[root@zk-2 conf]# echo "2" > tmp/zookeeper/myid
[root@zk-2 local]# source /etc/profile
[root@zk-2 local]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
zk-3
[root@zk-1 ~]# scp -r /usr/local/jdk1.8.0_102 /usr/local/zookeeper-3.4.8 root@zk-3:/usr/local/
[root@zk-1 ~]# scp /etc/profile root@zk-3:/etc
root@zk-3's password:
profile
[root@zk-3conf]# mkdir -p tmp/zookeeper
[root@zk-3 conf]# echo "3" > tmp/zookeeper/myid
[root@zk-3 ~]# source /etc/profile
[root@zk-3 ~]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
[root@zk-3 ~]#
5.6 Zookeeper集群启动
[root@zk-1 ~]# /usr/local/zookeeper-3.4.8/bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper-3.4.8/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@zk-1 ~]# /usr/local/zookeeper-3.4.8/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper-3.4.8/bin/../conf/zoo.cfg
Mode: follower
[root@zk-1 ~]#
[root@zk-2 local]# /usr/local/zookeeper-3.4.8/bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper-3.4.8/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@zk-2 local]# /usr/local/zookeeper-3.4.8/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper-3.4.8/bin/../conf/zoo.cfg
Mode: leader
[root@zk-2 local]#
[root@zk-3 ~]# /usr/local/zookeeper-3.4.8/bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper-3.4.8/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@zk-3 ~]# /usr/local/zookeeper-3.4.8/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper-3.4.8/bin/../conf/zoo.cfg
Mode: follower
5.7 Zookeeper集群测试
[root@zk-1 ~]# telnet zk-1 2181
Trying 10.122.18.41...
Connected to zk-1.
Escape character is '^]'.
quit
Connection closed by foreign host.
[root@zk-1 ~]#
[root@zk-1 ~]# cd /usr/local/zookeeper-3.4.8/src/c
[root@zk-1 c]# ./configure && make && make install
/usr/bin/mkdir -p '/usr/local/bin'
/bin/sh ./libtool --mode=install /usr/bin/install -c cli_st cli_mt load_gen '/usr/local/bin'
libtool: install: /usr/bin/install -c .libs/cli_st /usr/local/bin/cli_st
libtool: install: /usr/bin/install -c .libs/cli_mt /usr/local/bin/cli_mt
libtool: install: /usr/bin/install -c .libs/load_gen /usr/local/bin/load_gen
/usr/bin/mkdir -p '/usr/local/include/zookeeper'
/usr/bin/install -c -m 644 include/zookeeper.h include/zookeeper_version.h include/zookeeper_log.h include/proto.h include/recordio.h generated/zookeeper.jute.h '/usr/local/include/zookeeper'
make[1]: Leaving directory `/usr/local/zookeeper-3.4.8/src/c'
[root@zk-1 c]#
[root@zk-1 ~]# cli_mt zk-1:2181
****** 注意事项************
cli_mt: error while loading shared libraries: libzookeeper_mt.so.2: cannot open shared object file: No such file or directory
#################################
在/etc/ld.so.conf中加入/usr/local/lib这一行,保存之后,再运行:/sbin/ldconfig –v更新一下配置即可
****************************************
Watcher SESSION_EVENT state = CONNECTED_STATE
Got a new session id: 0x15a22b86de10000
sdfldskf
help
create [+[e|s]] <path>
delete <path>
set <path> <data>
get <path>
ls <path>
ls2 <path>
sync <path>
exists <path>
wexists <path>
myid
verbose
addauth <id> <scheme>
quit
5.8 在线Leader选举过程
http://thesecretlivesofdata.com/raft/
6.Kafka集群实战
6.1 环境准备工作
6.1.1 停掉Firewalld禁用SELINUX
三个节点都需要执行
[root@zk-1 ~]# systemctl disable firewalld
[root@zk-1 ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
6.1.2 安装JDK及配置环境变量
[root@zk-1 ~]# tar zxvf jdk-8u102-linux-x64.tar.gz -C /usr/local/
[root@zk-1 jdk1.8.0_102]# vim /etc/profile
#JDK ENV
export JAVA_HOME=/usr/local/jdk1.8.0_102
export PATH=$JAVA_HOME/bin:$PATH
[root@zk-1 jdk1.8.0_102]# source /etc/profile
[root@zk-1 jdk1.8.0_102]# java -version
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
6.1.3 Zookeeper 环境准备好
如 上zookeeper 配置内容, (略)
6.2 安装Kafka
[root@zk-1 ~]# tar zxvf kafka_2.10-0.10.1.1.tgz -C /usr/local/
[root@zk-2 ~]# tar zxvf kafka_2.10-0.10.1.1.tgz -C /usr/local/
[root@zk-3 ~]# tar zxvf kafka_2.10-0.10.1.1.tgz -C /usr/local/
6.3 配置Kafka
Kafka-1
[root@zk-1 config]# grep -v "^#" server.properties |grep -v "^$"
broker.id=1
listeners=PLAINTEXT://10.122.18.41:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/usr/local/kafka_2.10-0.10.1.1/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=zk-1:2181,zk-2:2181,zk-3:2181
zookeeper.connection.timeout.ms=6000
[root@zk-1 config]# pwd
/usr/local/kafka_2.10-0.10.1.1/config
[root@zk-1 config]#
Kafka-2 Kafka-3 注意上述的四项配置即可,主要调整下面两项
Kafka-2:
broker.id=:2
listeners=PLAINTEXT://当前主机名或IP地址:9092
Kafka-3:
broker.id=:2
listeners=PLAINTEXT://当前主机名或IP地址:9092
6.4 启动kafka 服务
启动前确保zookeeper 服务是启动的
[root@zk-1 kafka_2.10-0.10.1.1]# pwd
/usr/local/kafka_2.10-0.10.1.1
[root@zk-1 kafka_2.10-0.10.1.1]# bin/kafka-server-start.sh config/server1.properties &
[root@zk-2 kafka_2.10-0.10.1.1]# bin/kafka-server-start.sh config/server1.properties &
[root@zk-3 kafka_2.10-0.10.1.1]# bin/kafka-server-start.sh config/server1.properties &
6.5 Kafka测试与验证
6.5.1 创建主题
bin/kafka-topics.sh --create --zookeeper vh06.stu.cn:2181 --replication-factor 3 --partitions 1 --topic my-replicated-topic
6.5.2 查看主题信息
bin/kafka-topics.sh --describe --zookeeper vh06.stu.cn:2181 --topic my-replicated-topic
这个时候看那个节点是Leader 在leader上输入内容,在其他节点接受内容;
集群的话把当前的leader停掉,重新查看那个节点是leader ,在不是leader节点上还能收到同样的内容
6.5.3 往主题输入内容
bin/kafka-console-producer.sh --broker-list vh06.stu.cn:9092 --topic my-replicated-topic
输入message 信息
1234567890
1234567890
6.5.4 consumer 接收主题信息
再开启一个ssh 窗口接收主题信息:
bin/kafka-console-consumer.sh --bootstrap-server vh06.stu.cn:9092 --from-beginning --topic my-replicated-topic
测试完成~~~~
详细信息见官网:http://fireinwind.iteye.com/blog/2083333
6.5.5 查看主题
这个是单节点的kafka 测试
[root@zk-1 kafka_2.10-0.10.1.1]# bin/kafka-topics.sh --list --zookeeper zk-1:2181
test
[root@zk-1 kafka_2.10-0.10.1.1]# bin/kafka-topics.sh --list --zookeeper zk-2:2181
test
[root@zk-1 kafka_2.10-0.10.1.1]# bin/kafka-topics.sh --list --zookeeper zk-3:2181
test
[root@zk-1 kafka_2.10-0.10.1.1]#
8.Spark集群
8.1 设置环境变量
[root@chenjiaxin-1 local]# tail -5 /etc/profile
#JDK ENV
JAVA_HOME=/usr/local/jdk1.8.0_102
HADOOP_HOME=/usr/local/hadoop-2.7.3
SPARK_HOME=/usr/local/spark-2.1.0-bin-hadoop2.7
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH
[root@chenjiaxin-1 local]# tar -zxvf spark-2.1.0-bin-hadoop2.6.tgz -C /usr/local/
8.2 编辑slaves内容
[root@chenjiaxin-1 conf]# cp slaves.template slaves
chenjiaxin-2.novalocal
chenjiaxin-3.novalocal
8.3 编辑 spark-env.sh
[root@chenjiaxin-1 conf]# cp spark-env.sh.template spark-env.sh
[root@chenjiaxin-1 conf]# vim spark-env.sh
export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop-2.7.3/bin/hadoop classpath)
export HADOOP_CONF_DIR=/usr/local/hadoop-2.7.3/etc/hadoop
export SPARK_MASTER_IP=10.122.18.69
8.4 拷贝spark 配置到slave 节点
[root@chenjiaxin-1 conf]# scp /usr/local/
[root@chenjiaxin-1 local]# scp -r spark-2.1.0-bin-hadoop2.7/ chenjiaxin-2:/usr/local/
[root@chenjiaxin-1 local]# scp -r spark-2.1.0-bin-hadoop2.7/ chenjiaxin-3:/usr/local/
8.5 启动hadoop集群
[root@chenjiaxin-1 hadoop-2.7.3]# sbin/start-all.sh
8.6 再启动spark集群
[root@chenjiaxin-1 spark-2.1.0-bin-hadoop2.7]# sbin/start-master.sh
[root@chenjiaxin-1 spark-2.1.0-bin-hadoop2.7]# sbin/start-slave.sh
注意:当slaves 节点上 JAVA_HOME is not set
在spark-env.sh中设置一下$JAVA_HOME
8.7访问验证
http://IP Addr:8080 或 http://IP Addr:7077
9.Hadoop数据分析实战案例
9.1 实例场景说明
利用Hadoo平台+ Java等程序进行分析提取以下数据:
1)统计首字母分别为a~z的单词数目,以及首字母为a~z的单词平均长度;
2)统计尾字母分别为a~z的单词数目,以及尾字母为a~z的单词的平均长度;
3)按照单词首字母和尾字母的组合统计单词数目,以及各个组合里面单词平均长度;(举个例子,单词为apple,那么首字母为a,尾字母为e, apple这个单词就要归属到首字母为a尾字母为e的那一组。理论上组合有26*26种,但有的组合可能并没有单词)
9.2 Hadoop 多节点集群配置如上(略)
9.3 运行方式
bin/hadoop jar ./map2.jar com.g4.test.WordCountThree /input/hadoop_input.txt /output/result03
bin/hadoop jar ./map2.jar com.g4.test.WordCountTwo /input/hadoop_input.txt /output/result02
bin/hadoop jar ./map2.jar com.g4.test.WordCountOne /input/hadoop_input.txt /output/result01
9.4 Java 代码参考(就不提供啦)
Hadoop数据分析实战案例--自己打的war包!
17/10/19 18:06:53 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [vh05.stu.cn]
vh05.stu.cn: starting namenode, logging to /usr/local/hadoop/logs/hadoop-root-namenode-vh05.stu.cn.out
vh07.stu.cn: starting datanode, logging to /usr/local/hadoop/logs/hadoop-root-datanode-vh07.stu.cn.out
vh06.stu.cn: starting datanode, logging to /usr/local/hadoop/logs/hadoop-root-datanode-vh06.stu.cn.out
17/02/24 18:07:05 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable