• 1、环境搭建


    一、Linux基础环境准备

    系统:centos6.5      三台
    1、系统安装
    
    2、关闭防火墙、selinux
    
    3、修改主机名并修改hosts文件
    
    4、配置ssh互信
    
    5、安装JDK1.7


    二、hadoop安装

    1、解压安装

    版本: hadoop2.4.1
    
    [root@s1 src]# pwd
    /usr/local/src
    
    [root@s1 src]# ls
    apache-hive-0.13.1-bin.tar.gz  hadoop-2.4.1.tar.gz        kafka_2.9.2-0.8.1.tgz  slf4j-1.7.6.zip                zookeeper-3.4.5.tar.gz
    CentOS6-Base-163.repo          jdk-7u80-linux-x64.tar.gz  scala-2.11.4.tgz       spark-1.3.0-bin-hadoop2.4.tgz
    
    [root@s1 src]# tar zxf hadoop-2.4.1.tar.gz -C /usr/local/
    
    [root@s1 local]# mv hadoop-2.4.1/ hadoop
    
    
    ##添加环境变量
    vim /etc/profile
    export JAVA_HOME=/usr/local/jdk1.7
    export HADOOP_HOME=/usr/local/hadoop
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    
    [root@s1 local]# source /etc/profile


    2、修改配置文件

    ##core-site.xml
    <property>  <
      name>fs.default.name</name>  
      <value>hdfs://spark1:9000</value>
    </property>
    
    
    ##hdfs-site.xml
    <property>
      <name>dfs.name.dir</name>
      <value>/usr/local/data/namenode</value>
    </property>
    <property>
      <name>dfs.data.dir</name>
      <value>/usr/local/data/datanode</value>
    </property>
    <property>
      <name>dfs.tmp.dir</name>
      <value>/usr/local/data/tmp</value>
    </property>
    <property>
      <name>dfs.replication</name>
      <value>3</value>
    </property>
    
    [root@spark1 ~]# mkdir /usr/local/data
    [root@spark2 ~]# mkdir /usr/local/data
    [root@spark3 ~]# mkdir /usr/local/data
    
    
    ##mapred-site.xml
    <property>  
      <name>mapreduce.framework.name</name>  
      <value>yarn</value>
    </property>
    
    
    ##yarn-site.xml
    <property>
      <name>yarn.resourcemanager.hostname</name>
      <value>spark1</value>
    </property>
    <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle</value>
    </property>
    
    
    ##slaves
    spark1
    spark2
    spark3

    hadoop-env.sh、mapred-env.sh、yarn-env.sh 这几个文件中的JAVA HOME最好也配置一下;


    3、分发hadoop

    [root@spark1 local]# scp -r hadoop spark2:/usr/local/
    
    [root@spark1 local]# scp -r hadoop spark3:/usr/local/
    
    
    
    ##分发profile
    [root@spark1 local]# scp -r /etc/profile spark2:/etc/
    
    [root@spark1 local]# scp -r /etc/profile spark3:/etc/


    4、启动

    ##启动hdfs
    [root@spark1 ~]# start-dfs.sh 
    
    
    spark1:namenode、datanode、secondarynamenode
    spark2:datanode
    spark3:datanode
    
    
    ##浏览器访问50070端口
    
    
    
    ##启动yarn
    [root@spark1 hadoop]# start-yarn.sh
    
    spark1:resourcemanager、nodemanager
    spark2:nodemanager
    spark3:nodemanager
    
    ##浏览器访问8088端口


    三、Hive搭建

    1、安装

    [root@spark1 src]# tar zxf apache-hive-0.13.1-bin.tar.gz -C /usr/local/
    
    [root@spark1 local]# mv apache-hive-0.13.1-bin/ hive
    
    
    ##修改环境变量
    [root@spark1 local]# vim /etc/profile
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin


    2、安装mysql

    ##mysql存储hive的元数据
    
    ##在spark1安装mysql
    [root@spark1 ~]# yum install -y mysql-server
    
    [root@spark1 ~]# service mysqld start
    
    [root@spark1 ~]# chkconfig mysqld on
    
    
    ##安装mysql connector
    yum install -y mysql-connector-java
    
    ##将mysql connector拷贝到hive的lib包中
    [root@spark1 ~]# cp /usr/share/java/mysql-connector-java-5.1.17.jar /usr/local/hive/lib/
    
    
    ##在mysql上创建hive元数据库,并对hive进行授权
    [root@spark1 ~]# mysql 
    Welcome to the MySQL monitor.  Commands end with ; or g.
    Your MySQL connection id is 2
    Server version: 5.1.73 Source distribution
    
    Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
    
    Oracle is a registered trademark of Oracle Corporation and/or its
    affiliates. Other names may be trademarks of their respective
    owners.
    
    Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.
    
    mysql> create database if not exists hive_metadata;
    Query OK, 1 row affected (0.00 sec)
    
    mysql> grant all privileges on hive_metadata.* to 'hive'@'%' identified by 'hive';
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> grant all privileges on hive_metadata.* to 'hive'@'localhost' identified by 'hive';
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> grant all privileges on hive_metadata.* to 'hive'@'spark1' identified by 'hive';
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> flush privileges;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> use hive_metadata;
    Database changed
    mysql> show tables;
    Empty set (0.00 sec)


    3、配置hive

    ##hive-site.xml
    <configuration>
    
    <property>
      <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://spark1:3306/hive_metadata?createDatabaseIfNotExist=true</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>hive</value>
    </property>
    <property>
      <name>hive.metastore.warehouse.dir</name>
      <value>/user/hive/warehouse</value>
    </property>
    
    </configuration>
    
    
    
    ##
    mv hive-env.sh.template hive-env.sh
    
    
    
    ##
    vi /usr/local/hive/bin/hive-config.sh
    export JAVA_HOME=/usr/java/latest
    export HIVE_HOME=/usr/local/hive
    export HADOOP_HOME=/usr/local/hadoop
    
    
    
    ##验证
    [root@spark1 bin]# hive
    
    Logging initialized using configuration in jar:file:/usr/local/hive/lib/hive-common-0.13.1.jar!/hive-log4j.properties
    hive> create table t1(id int);
    OK
    Time taken: 0.645 seconds


    四、zookeeper搭建

    1、搭建

    [root@spark1 src]# tar zxf zookeeper-3.4.5.tar.gz -C /usr/local/
    
    [root@spark1 local]# mv zookeeper-3.4.5/ zk
    
    
    #配置环境变量
    vim /etc/profile
    ##path
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export ZOOKEEPER_HOME=/usr/local/zk
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin
    
    
    #创建zk data目录
    [root@spark1 conf]# mkdir /usr/local/zk/data
    
    
    #编辑配置文件
    [root@spark1 conf]# mv zoo_sample.cfg zoo.cfg
    [root@spark1 conf]# vim zoo.cfg 
    dataDir=/usr/local/zk/data
    server.0=spark1:2888:3888       
    server.1=spark2:2888:3888
    server.2=spark3:2888:3888
    
    
    
    ##设置zk节点标识
    [root@spark1 conf]# cd /usr/local/zk/data/
    
    [root@spark1 data]# vim myid
    0


    2、分发

    [root@spark1 local]# scp -r /usr/local/zk spark2:/usr/local/
    
    [root@spark1 local]# scp -r /usr/local/zk spark3:/usr/local/
    
    
    分发完后,唯一的区别是spark2和spark3的标识号分别设置为1和2     #vim /usr/local/zk/data/myid

    ##分发/etc/profile

    [root@spark1 local]# scp -r /etc/profile spark2:/etc/
      
    [root@spark1 local]# scp -r /etc/profile spark3:/etc/

    source /etc/profile


    3、启动

    1、分别在三台机器上执行:zkServer.sh start
    
    2、检查ZooKeeper状态:zkServer.sh status


    五、kafka搭建

    1、安装scala

    ##安装scala
    [root@spark1 src]# tar -zxvf scala-2.11.4.tgz -C /usr/local/
    
    [root@spark1 local]# mv scala-2.11.4 scala
    
    
    
    ##修改环境变量
    ##path
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export ZOOKEEPER_HOME=/usr/local/zk
    export SCALA_HOME=/usr/local/scala
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin
    
    
    
    ##分发scala
    scp -r scala spark2:/usr/local/
    scp -r scala spark3:/usr/local/
    
    [root@spark1 local]# scp -r /etc/profile spark2:/etc/
    [root@spark1 local]# scp -r /etc/profile spark3:/etc/
    
    都要:source /etc/profile


    2、搭建kafka

    [root@spark1 src]# tar zxf kafka_2.9.2-0.8.1.tgz -C /usr/local/
    
    [root@spark1 local]# mv kafka_2.9.2-0.8.1 kafka
    
    
    ##配置
    kafkavi /usr/local/kafka/config/server.properties
    broker.id            #要唯一,依次增长(0、1、2、3、4)
    
    zookeeper.connect=192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181            #配置zookeeper
    
    
    
    ##安装slf4j
    unzip slf4j-1.7.6.zip
    
    [root@spark1 src]# cp slf4j-1.7.6/slf4j-nop-1.7.6.jar /usr/local/kafka/libs/
    
    
    
    
    ##分发kafka
    用scp把kafka拷贝到spark2和spark3即可;
    唯一区别的,就是server.properties中的broker.id,要设置为1和2


    3、启动kafka

    ###解决kafka Unrecognized VM option 'UseCompressedOops'问题,去掉-XX:+UseCompressedOops即可
    vi bin/kafka-run-class.sh 
    if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then  KAFKA_JVM_PERFORMANCE_OPTS="-server  -XX:+UseCompressedOops -XX:+UseParNewGC -XX:+UseConcMarkSweepGC 
    -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true"fi
    
    
    ##分发kafka-run-class.sh
    [root@spark1 kafka]# scp -r bin/kafka-run-class.sh spark2:/usr/local/kafka/bin/
    
    [root@spark1 kafka]# scp -r bin/kafka-run-class.sh spark3:/usr/local/kafka/bin/
    
    
    
    ##启动,三台都要执行
    cd //usr/local/kafka/        #必须
    
    [root@spark3 kafka]# nohup bin/kafka-server-start.sh config/server.properties &
    
    jps
    
    
    
    
    ##在spark1测试kafka集群,要开两个窗口
    //窗口1
    cd //usr/local/kafka/        #必须
    
    [root@spark1 kafka]# bin/kafka-topics.sh --zookeeper 192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 --topic TestTopic --replication-factor 1 --partitions 1 --create
    Created topic "TestTopic".
    
    [root@spark1 kafka]# bin/kafka-console-producer.sh --broker-list 192.168.1.135:9092,192.168.1.136:9092,192.168.1.137:9092 --topic TestTopic
    hello kafka        #这边发送消息
    
    
    //窗口2
    cd //usr/local/kafka/        #必须
    
    [root@spark1 kafka]# bin/kafka-console-consumer.sh --zookeeper 192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 --topic TestTopic --from-beginning
    hello kafka        #这边已经收到了


    六、spark搭建

    1、安装

    [root@spark1 src]# tar zxf spark-1.3.0-bin-hadoop2.4.tgz -C /usr/local/
    
    [root@spark1 local]# mv spark-1.3.0-bin-hadoop2.4 spark
    
    
    ##设置spark环境变量
    [root@spark1 local]# vim /etc/profile
    ##path
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export ZOOKEEPER_HOME=/usr/local/zk
    export SCALA_HOME=/usr/local/scala
    export SPARK_HOME=/usr/local/spark
    export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin:$SPARK_HOME/bin


    2、配置

    ##修改spark-env.sh文件
    [root@spark1 ~]# cd /usr/local/spark/conf/
    
    [root@spark1 conf]# cp spark-env.sh.template spark-env.sh
    
    vim  spark-env.sh
    export JAVA_HOME=/usr/java/latest
    export SCALA_HOME=/usr/local/scala
    export SPARK_MASTER_IP=192.168.1.135
    export SPARK_WORKER_MEMORY=1g
    export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
    
    
    
    ##slaves
    [root@spark1 conf]# mv slaves.template slaves
    
    vim slaves
    #spark1        #我这里spark1就不让它跑了
    spark2
    spark3


    3、分发

    ##分发spark
    用scp将spark和/etc/profile拷贝到spark2和spark3即可
    
    scp -r spark spark2:/usr/local/
    scp -r spark spark3:/usr/local/
    
    scp -r /etc/profile spark2:/etc/
    scp -r /etc/profile spark3:/etc/
    source /etc/profile


    4、启动

    ##spark1
    [root@spark1 local]# cd /usr/local/spark/sbin/
    
    [root@spark1 sbin]# ./start-all.sh
    
    
    
    ##jps
    [root@spark1 sbin]# jps
    4615 Jps
    3718 QuorumPeerMain
    1664 SecondaryNameNode
    4052 Kafka
    4450 Master
    1397 NameNode
    1879 ResourceManager
    1976 NodeManager
    1514 DataNode
    
    
    [root@spark2 kafka]# jps
    1374 NodeManager
    2249 Jps
    1988 Kafka
    2130 Worker
    1263 DataNode
    1774 QuorumPeerMain
    
    
    [root@spark3 kafka]# jps
    1265 DataNode
    2014 Kafka
    1377 NodeManager
    2155 Worker
    1791 QuorumPeerMain
    2274 Jps
    
    
    ##浏览器打开:ip+8080
    
    
    ##进入spark shell
    [root@spark1 sbin]# spark-shell
  • 相关阅读:
    cocos2dx 2.0 学习笔记简单动画
    几种插入数据的方法
    c#中如何获取本机用户名、MAC地址、IP地址、硬盘ID、CPU序列号、系统名称、物理内存
    C#高效编程改进C#代码的50个行之有效的办法摘抄笔记
    获取两个日期之间的每一天
    取SQL中某个字段的后两位
    该行已经属于另一个表 的解决办法
    一个实体对象不能由多个 IEntityChangeTracker 实例引用
    MVC返回JSON,IE下无法接收JSON,IE下JSON提示另存为
    entity framework DbContext Attach判断
  • 原文地址:https://www.cnblogs.com/weiyiming007/p/11065812.html
Copyright © 2020-2023  润新知