1. 下载hive安装包
2. 进入 conf 中 : cp hive-default.xml.template hive-site.xml, vi hive-site.xml
1) 找到如下对应的配置修改对应的值 (例如: /javax.jdo.option.ConnectionURL)
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>Gw_sp1226</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://gw-sp.novalocal:3306/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
3. cp hive-env.sh.template hive-env.sh, vi hive-env.sh
export HADOOP_HOME=/home/hadoop/hadoop ##Hadoop安装路径 export HIVE_CONF_DIR=/home/hadoop/hive-2.1.1/conf ##Hive配置文件路径
export HIVE_AUX_JARS_PATH=/usr/hive/apache-hive-2.2.0-bin/lib ##Hive lib 目录
4. 拷贝mysql-connector-java-5.1.7-bin.jar到hive的lib包中,(格式化json需要多加2个jar包json-serde-1.3.8-jar-with-dependencies.jar 和json-udf-1.3.8-jar-with-dependencies.jar, 具体参考如下flume存储数据到hive)
链接:https://pan.baidu.com/s/1suPzGJmtJlsROC6SVpcztQ 密码:zlgg
目标: 通过接受 1084端口的http请求信息, 存储到 hive数据库中, osgiweb2.db为hive中创建的数据库名称 periodic_report5 为创建的数据表, flume配置如下: a1.sources=r1 a1.channels=c1 a1.sinks=k1 a1.sources.r1.type = http a1.sources.r1.bind = 0.0.0.0 a1.sources.r1.port = 1084 a1.sources.r1.handler=jkong.Test.HTTPSourceDPIHandler #a1.sources.r1.interceptors=i1 i2 #a1.sources.r1.interceptors.i1.type=regex_filter #a1.sources.r1.interceptors.i1.regex=\{.*\} #a1.sources.r1.interceptors.i2.type=timestamp a1.channels.c1.type=memory a1.channels.c1.capacity=10000 a1.channels.c1.transactionCapacity=1000 a1.channels.c1.keep-alive=30 a1.sinks.k1.type=hdfs a1.sinks.k1.channel=c1 a1.sinks.k1.hdfs.path=hdfs://gw-sp.novalocal:1086/user/hive/warehouse/osgiweb2.db/periodic_report5 a1.sinks.k1.hdfs.fileType=DataStream a1.sinks.k1.hdfs.writeFormat=Text a1.sinks.k1.hdfs.rollInterval=0 a1.sinks.k1.hdfs.rollSize=10240 a1.sinks.k1.hdfs.rollCount=0 a1.sinks.k1.hdfs.idleTimeout=60 a1.sources.r1.channels=c1 a1.sinks.k1.channel=c1 复制代码 2. 数据表创建: create table periodic_report5(id BIGINT, deviceId STRING,report_time STRING,information STRING) row format serde "org.openx.data.jsonserde.JsonSerDe" WITH SERDEPROPERTIES("id"="$.id","deviceId"="$.deviceId","report_time"="$.report_time","information"="$.information"); 2.1 将数据表中的字段也同样拆分成数据字段的创表语句(还没有试验, 暂时不用) 复制代码 create table periodic_report4(id BIGINT, deviceId STRING,report_time STRING,information STRUCT<actualTime:BIGINT,dpiVersionInfo:STRING,subDeviceInfo:STRING,wanTrafficData:STRING,ponInfo:STRING,eventType:STRING,potsInfo:STRING,deviceInfo:STRING,deviceStatus:STRING>) row format serde "org.openx.data.jsonserde.JsonSerDe" WITH SERDEPROPERTIES("input.invalid.ignore"="true","id"="$.id","deviceId"="$.deviceId","report_time"="$.report_time","requestParams.actualTime"="$.requestParams.actualTime","requestParams.dpiVersionInfo"="$.requestParams.dpiVersionInfo","requestParams.subDeviceInfo"="$.requestParams.subDeviceInfo","requestParams.wanTrafficData"="$.requestParams.wanTrafficData","requestParams.ponInfo"="$.requestParams.ponInfo","requestParams.eventType"="$.requestParams.eventType","requestParams.potsInfo"="$.requestParams.potsInfo","requestParams.deviceInfo"="$.requestParams.deviceInfo","requestParams.deviceStatus"="$.requestParams.deviceStatus"); 复制代码 3. 启动flume语句:flume 根目录 bin/flume-ng agent --conf ./conf/ -f ./conf/flume.conf --name a1 -Dflume.root.logger=DEBUG,console 4. 启动hive语句: hive bin目录 hive 或者: ./hive -hiveconf hive.root.logger=DEBUG,console #带log信息启动
5. 进入 bin 目录
./schematool -dbType mysql -initSchema #初始化命令
如果初始化失败,可能原因是 mysql 权限问题,
(1)通过 mysql -uroot -p 命令进入mysql中,
(2)use mysql
(3)select user,host,authentication_string from user;
(4)查看 root 用户 权限是否对应的是 %,如果不是,输入如下命令进行修改,再进行查看
(5)update user set host='%' where user='root';
./schematool -dbType mysql -info #查看
6. 在mysql中查看 hive的元数据库是否有表生成
7. bin 下 hive 启动hive, 输入 show tables; (可以通过: ./hive -hiveconf hive.root.logger=DEBUG,console 打印详细log启动 hive)
8. jdbc链接hive
package com.hive.testHiveJdbc; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class HiveJDBCConnection { private static String driverName = "org.apache.hive.jdbc.HiveDriver"; // private static String url = "jdbc:hive2://223.105.1.203:1083/default"; private static String url = "jdbc:hive2://192.168.88.142:10000/osgiweb"; private static String userName = "hive"; private static String passWord = "hive"; public static void main(String[] args) { try { Class.forName(driverName); Connection con = DriverManager.getConnection(url, userName, passWord); Statement stmt = con.createStatement(); String tableName = "periodic_report2"; String sql = null; /* String sql = "drop table if exists " + tableName; stmt.execute(sql); // 创建表 sql = "create table" + tableName + " (key string,value string) row format delimited fields terminated by ',' stored as textfile "; stmt.execute(sql); //加载数据 String Path="/home/hive_1.txt"; sql ="load data local inpath '"+Path+"' into table "+tableName; stmt.execute(sql); */ // 查询数据 sql ="select * from "+tableName; ResultSet res = stmt.executeQuery(sql); while(res.next()){ System.out.println(res.getString(1)+" "+res.getString(1)); } } catch (ClassNotFoundException e) { System.out.println("没有找到驱动类"); e.printStackTrace(); } catch (SQLException e) { System.out.println("连接Hive的信息有问题"); e.printStackTrace(); } } }
9. maven pom.xml 配置
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.hive</groupId> <artifactId>testHiveJdbc</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>testHiveJdbc</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc --> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>2.3.3</version> <exclusions> <exclusion> <groupId>org.eclipse.jetty.orbit</groupId> <artifactId>javax.servlet</artifactId> </exclusion> <exclusion> <groupId>org.htrace</groupId> <artifactId>htrace-core</artifactId> </exclusion> <exclusion> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-jaxrs</artifactId> </exclusion> <exclusion> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-xc</artifactId> </exclusion> <exclusion> <groupId>xerces</groupId> <artifactId>xercesImpl</artifactId> </exclusion> <exclusion> <groupId>xml-apis</groupId> <artifactId>xml-apis</artifactId> </exclusion> <exclusion> <groupId>com.sun.jersey</groupId> <artifactId>jersey-client</artifactId> </exclusion> <exclusion> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-core-asl</artifactId> </exclusion> <exclusion> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-mapper-asl</artifactId> </exclusion> <exclusion> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> </exclusion> <exclusion> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> </exclusion> <exclusion> <groupId>org.apache.directory.server</groupId> <artifactId>apacheds-kerberos-codec</artifactId> </exclusion> <exclusion> <groupId>org.apache.directory.server</groupId> <artifactId>apacheds-i18n</artifactId> </exclusion> <exclusion> <groupId>org.apache.directory.api</groupId> <artifactId>api-asn1-api</artifactId> </exclusion> <exclusion> <groupId>org.apache.directory.api</groupId> <artifactId>api-util</artifactId> </exclusion> <exclusion> <groupId>javax.xml.stream</groupId> <artifactId>stax-api</artifactId> </exclusion> <exclusion> <groupId>org.fusesource.leveldbjni</groupId> <artifactId>leveldbjni-all</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </exclusion> </exclusions> </dependency> <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.25</version> </dependency> </dependencies> </project>
参考资料: https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-JDBC