一、HIVE概览小结
二、HIVE安装
Hive只在一个节点上安装即可 1.上传tar包 2.解压 tar -zxvf hive-0.9.0.tar.gz -C /cloud/ 3.配置mysql metastore(切换到root用户) 配置HIVE_HOME环境变量 rpm -qa | grep mysql rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm 修改mysql的密码 /usr/bin/mysql_secure_installation (注意:删除匿名用户,允许用户远程连接) 登陆mysql mysql -u root -p 4.配置hive cp hive-default.xml.template hive-site.xml 修改hive-site.xml(删除所有内容,只留一个<property></property>) 添加如下内容: <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://weekend01:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>root</value> <description>password to use against metastore database</description> </property> 5.安装hive和mysq完成后,将mysql的连接jar包拷贝到$HIVE_HOME/lib目录下 如果出现没有权限的问题,在mysql授权(在安装mysql的机器上执行) mysql -uroot -p #(执行下面的语句 *.*:所有库下的所有表 %:任何IP地址或主机都可以连接) GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION; FLUSH PRIVILEGES; 6.建表(默认是内部表) create table trade_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by ' '; 建分区表 create table td_part(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by ' '; 建外部表 create external table td_ext(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by ' ' location '/td_ext'; 7.创建分区表 普通表和分区表区别:有大量数据增加的需要建分区表 create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by ' '; 分区表加载数据 load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22'); load data local inpath '/root/data.am' into table beauty partition (nation="USA"); select nation, avg(size) from beauties group by nation order by avg(size);
三、HIVE基础
1.上传hive安装包 2.解压 3.配置 3.1安装mysql 查询以前安装的mysql相关包 rpm -qa | grep mysql 暴力删除这个包 rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm 执行命令设置mysql /usr/bin/mysql_secure_installation 将hive添加到环境变量当中 GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION; FLUSH PRIVILEGES 在hive当中创建两张表 create table trade_detail (id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by ' '; create table user_info (id bigint, account string, name string, age int) row format delimited fields terminated by ' '; 将mysq当中的数据直接导入到hive当中 sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by ' ' sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table user_info --hive-import --hive-overwrite --hive-table user_info --fields-terminated-by ' ' 创建一个result表保存前一个sql执行的结果 create table result row format delimited fields terminated by ' ' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account); create table user (id int, name string) row format delimited fields terminated by ' ' 将本地文件系统上的数据导入到HIVE当中 load data local inpath '/root/user.txt' into table user; 创建外部表 create external table stubak (id int, name string) row format delimited fields terminated by ' ' location '/stubak'; 创建分区表 普通表和分区表区别:有大量数据增加的需要建分区表 create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by ' '; 分区表加载数据 load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22');
四、HIVE SQL
set hive.cli.print.header=true; CREATE TABLE page_view(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User') COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '