• 大数据平台搭建:Hadoop-3.1.3+Hive-3.1.2+HBase-2.2.3+Zookeeper-3.5.7+Kafka_2.11-2.4.0+Spark-2.4.5


    1.框架选型

    hadoop-3.1.3

    hive-3.1.2

    zookeeper-3.5.7

    hbase-2.2.3

    kafka_2.11-2.4.0

    spark-2.4.5-bin-hadoop2.7

    2.安装前准备

    1.关闭防火墙

    2.安装 JDK

    3.安装 Scala

    4.配置ssh 免密

    5.配置 IP 和 主机名映射

    6.Mysql 安装

    3.安装

    3.1 Hadoop 安装

    1.hadoop-env.sh
    export JAVA_HOME=/opt/module/jdk1.8.0_121
    
    2.hdfs-site.xml
     <property>
            <name>dfs.replication</name>
            <value>2</value>
      </property>
    
      <!-- 指定Hadoop辅助名称节点主机配置 -->
      <property>
          <name>dfs.namenode.secondary.http-address</name>
          <value>hadoop102:50090</value>
      </property>
    
      <property>
           <name>dfs.namenode.name.dir</name>
           <value>/opt/module/hadoop-3.1.3/data/dfs/nn</value>
      </property>
      <property>
           <name>dfs.datanode.data.dir</name>
           <value>/opt/module/hadoop-3.1.3/data/dfs/dn</value>
      </property>
      <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
      </property>
    
    3.yarn-env.sh
    export JAVA_HOME=/opt/module/jdk1.8.0_121
    
    4.yarn-site.xml
    <!-- reducer获取数据的方式 -->
      <property>
    	<name>yarn.nodemanager.aux-services</name>
    	<value>mapreduce_shuffle</value>
      </property>
    
      <!-- 指定YARN的ResourceManager的地址 -->
      <property>
    	<name>yarn.resourcemanager.hostname</name>
    	<value>hadoop103</value>
      </property>
    
      <!-- 日志聚集功能使能 -->
      <property>
     	<name>yarn.log-aggregation-enable</name>
    	<value>true</value>
      </property>
    
      <!-- 日志保留时间设置3天 -->
      <property>
    	<name>yarn.log-aggregation.retain-seconds</name>
    	<value>259200</value>
      </property>
      
      <property>
    	<name>yarn.nodemanager.vmem-check-enabled</name>
    	<value>false</value>
      </property>
      <property>
    	<name>yarn.nodemanager.vmem-pmem-ratio</name>
    	<value>5</value>
      </property>
    
    5.mapred-env.sh
    export JAVA_HOME=/opt/module/jdk1.8.0_121
    
    6.mapred-site.xml
    <!-- 指定mr运行在yarn上 -->
      <property>
    	<name>mapreduce.framework.name</name>
    	<value>yarn</value>
      </property>
      
      <!--配置历史服务器-->
      <property>
    	<name>mapreduce.jobhistory.address</name>
    	<value>hadoop103:10020</value>
      </property>
      <property>
    	<name>mapreduce.jobhistory.webapp.address</name>
    	<value>hadoop103:19888</value>
      </property>
    
      <!-- 在hadoop3.x中需要执行mapreduce的运行环境 -->
      <property>
    	<name>yarn.app.mapreduce.am.env</name>
    	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
      </property>
      <property>
    	<name>mapreduce.map.env</name>
    	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
      </property>
      <property>
    	<name>mapreduce.reduce.env</name>
    	<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
      </property>
    
      <property>
    	<name>mapreduce.map.memory.mb</name>
    	<value>1024</value>
      </property>
    
    7.core-site.xml
    <configuration>
      <!-- 指定HDFS中NameNode的地址 -->
      <property>
        <name>fs.defaultFS</name>
        <value>hdfs://hadoop101:9000</value>
      </property>
    
      <!-- 指定Hadoop运行时产生文件的存储目录 -->
      <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/module/hadoop-3.1.3/data/tmp</value>
      </property>
    </configuration>
    
    8.workers

    hadoop2是slaves,hadoop3是workers
    该文件是指明哪些节点运行DataNode,NodeManager
    /opt/module/hadoop-3.1.3/etc/hadoop/workers

    hadoop101
    hadoop102
    hadoop103
    
    9.配置环境变量 /etc/profile
    #Java
    export JAVA_HOME=/opt/module/jdk1.8.0_121
    export PATH=$PATH:$JAVA_HOME/bin
    
    #Scala
    export SCALA_HOME=/opt/module/scala-2.11.12
    export PATH=$PATH:$SCALA_HOME/bin
    
    #Hadoop
    export HADOOP_HOME=/opt/module/hadoop-3.1.3
    export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    

    3.2 Hive安装

    1.hive-env.sh
    HADOOP_HOME=/opt/module/hadoop-3.1.3
    
    export HIVE_CONF_DIR=/opt/module/hive-3.1.2/conf
    
    export HIVE_AUX_JARS_PATH=/opt/module/hive-3.1.2/auxlib
    
    2.hive-site.xml
            <property>
    	  <name>javax.jdo.option.ConnectionURL</name>
    	  <value>jdbc:mysql://hadoop101:3306/metastore?createDatabaseIfNotExist=true</value>
    	</property>
    
    	<property>
    	  <name>javax.jdo.option.ConnectionDriverName</name>
    	  <value>com.mysql.jdbc.Driver</value>
    	</property>
    
    	<property>
    	  <name>javax.jdo.option.ConnectionUserName</name>
    	  <value>root</value>
    	</property>
    
    	<property>
    	  <name>javax.jdo.option.ConnectionPassword</name>
    	  <value>123456</value>
    	</property>
    
    	<property>
    	  <name>hive.cli.print.header</name>
    	  <value>true</value>
    	</property>
    
    	<property>
    	  <name>hive.cli.print.current.db</name>
    	  <value>true</value>
    	</property>
    
    	<property>
    	    <name>hive.metastore.uris</name>
    	    <value>thrift://hadoop101:9083</value>
    	</property>
    
    	<property>
    		<name>hive.server2.webui.host</name>
    		<value>hadoop101</value>
    	</property>
    
    	<property>
    		<name>hive.server2.webui.port</name>
    		<value>10002</value>
    	</property>
    
    	<!--Hive 3.x 默认打开了ACID,Spark不支持读取 ACID 的 Hive,需要关闭ACID-->
            <property>
                    <name>hive.strict.managed.tables</name>
                    <value>false</value>
            </property>
            <property>
                    <name>hive.create.as.insert.only</name>
                    <value>false</value>
            </property>
            <property>
                    <name>metastore.create.as.acid</name>
                    <value>false</value>
            </property>
    
    	<!--关闭版本验证-->
            <property>
                    <name>hive.metastore.schema.verification</name>
                    <value>false</value>
            </property>
    
    3.创建 HIVE_AUX_JARS_PATH
    mkdir -p /opt/module/hive-3.1.2/auxlib
    
    4.拷贝 mysql-connector-java-5.1.27-bin.jar 到 /opt/module/hive-3.1.2/lib 下
    cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar  /opt/module/hive-3.1.2/lib
    
    5. 配置环境变量
    #HIVE_HOME
    export HIVE_HOME=/opt/module/hive-3.1.2
    export PATH=$PATH:$HIVE_HOME/bin
    
    6. 第一次执行,进行初始化
    schematool -dbType mysql -initSchema
    

    3.3 Zookeeper 安装

    1.zoo.cfg
    tickTime=2000
    initLimit=10
    syncLimit=5
    dataDir=/opt/module/zookeeper-3.5.7/zkData
    clientPort=2181
    
    # 3台机器
    server.1=hadoop101:2888:3888
    server.2=hadoop102:2888:3888
    server.3=hadoop103:2888:3888
    
    2.创建 Zookeeper 数据目录
    mkdir -p /opt/module/zookeeper-3.5.7/zkData
    
    3.在 /opt/module/zookeeper-3.5.7/zkData 下创建myid,标识当前主机
    echo "1" > /opt/module/zookeeper-3.5.7/zkData/myid
    
    4.配置环境变量 /etc/profile
    #Zookeeper
    export ZOOKEEPER_HOME=/opt/module/zookeeper-3.5.7
    export PATH=$PATH:$ZOOKEEPER_HOME/bin
    
    5.分发zookeeper;注意:每台zookeeper节点的 myid 必须唯一

    3.4 HBase 安装

    1.hbase-env.sh
    export JAVA_HOME=/opt/module/jdk1.8.0_121
    
    #是否使用Hbase内置的Zookeeper.改成false,使用我们以前配置的Zookeeper
    export HBASE_MANAGES_ZK=false
    
    2.hbase-site.xml
      <!--hbase在hdfs上存储数据时的目录-->
      <property>
        	<name>hbase.rootdir</name>
       	<value>hdfs://hadoop101:9000/hbase</value>
      </property>
      <!--是否开启集群-->
      <property>
        	<name>hbase.cluster.distributed</name>
        	<value>true</value>
      </property>
      <property>
    	<name>hbase.tmp.dir</name>
    	<value>/opt/module/hbase-2.2.3/tmp</value>
      </property>
      <!--配置Zookeeper-->
      <property>
        	<name>hbase.zookeeper.quorum</name>
        	<value>hadoop101,hadoop102,hadoop103</value>
      </property>
      <property>
            <name>hbase.zookeeper.property.clientPort</name>
            <value>2181</value>
      </property>
      <!--Zookeeper的dataDir目录-->
      <property>
        	<name>hbase.zookeeper.property.dataDir</name>
        	<value>/opt/module/zookeeper-3.5.7/zkData</value>
      </property>
      <property>
            <name>zookeeper.znode.parent</name>
            <value>/hbase</value>
      </property>
    
      <property>
           <name>hbase.unsafe.stream.capability.enforce</name>
           <value>false</value>
       </property>
    
    3.regionservers
    hadoop101
    hadoop102
    hadoop102
    
    4.配置环境变量 /etc/profile
    #HBase
    export HBASE_HOME=/opt/module/hbase-2.2.3
    export PATH=$PATH:$HBASE_HOME/bin
    

    3.5 Kafka 安装

    1.server.properties
    broker.id=0
    log.dirs=/opt/module/kafka_2.11-2.4.0/logs
    zookeeper.connect=hadoop101:2181,hadoop102:2181,hadoop103:2181/kafka
    
    2.分发 kafka 到其他kafka节点;注意 server.properties中的 broker.id 必须全局唯一
    3.配置环境变量 /etc/profile
    #KAFKA_HOME
    export KAFKA_HOME=/opt/module/kafka_2.11-2.4.0
    export PATH=$PATH:$KAFKA_HOME/bin
    
    3.6 Spark安装
    1.spark-env.sh
    export JAVA_HOME=/opt/module/jdk1.8.0_121
    
    export SCALA_HOME=/opt/module/scala-2.11.12
    
    export SPARK_MASTER_IP=hadoop101
    
    export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop
    
    #history.retainedApplications=3  //内存中历史副本存1份
    export SPARK_HISTORY_OPTS="-Dspark.history.retainedApplications=1 -Dspark.history.fs.logDirectory=hdfs://hadoop101:9000/spark/log/"
    
    2.spark-defalt.conf
    spark.eventLog.enabled           true
    spark.eventLog.dir               hdfs://hadoop101:9000/spark/log/
    spark.yarn.historyServer.address hadoop102:18080
    
    3.slaves
    hadoop101
    hadoop102
    hadoop103
    
    4.创建 hdfs-site.xml,hdfs-site.xml,hive-site.xml的软连接到 /opt/module/spark-2.4.5-bin-hadoop2.7/conf 下
    ln -s /opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml
    ln -s /opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml
    ln -s /opt/module/hive-3.1.2/conf/hive-site.xml
    
    5. 拷贝 mysql-connector-java-5.1.27-bin.jar 到 /opt/module/spark-2.4.5-bin-hadoop2.7/jars 下
    cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/module/spark-2.4.5-bin-hadoop2.7/jars
    
    6.配置环境变量 /etc/profile
    #Spark
    export SPARK_HOME=/opt/module/spark-2.4.5-bin-hadoop2.7
    export PATH=$PATH:$SPARK_HOME/bin
    
    #spark 提示 unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native/:$LD_LIBRARY_PATH
    

    4. 总结

    本次安装,经测试全部可用!

  • 相关阅读:
    10.RobotFramework: 获取当前时间戳
    9.接口自动化-自定义关键字、接口设计规范
    8.接口自动化-RequestLibrary库的介绍与示例讲解
    7.接口自动化-环境常用库搭建
    6.数据库操作(DatabaseLibrary)与常见问题
    5.远程服务器操作(SSHLibrary)
    概率密度与概率分布函数
    Linux安装包类型
    随机变量与随机过程
    hosts文件
  • 原文地址:https://www.cnblogs.com/wuning/p/12595930.html
Copyright © 2020-2023  润新知