1.创建数据库
hive>create database myhive;
hive>create database if not exists myhive;
hive>show databases;
hive>show databases like '*t*';
说明:hive为创建的数据库生成了相对应的目录(*.db),目录在{hive.metastore.warehouse.dir}属性下,同时,数据库中的表将以目录中的子目录进行存储;default默认数据库除外。
a.自定义修改数据库存放位置,需单独指定(*.db)目录
hive>create database myhive3 location '/user/hive/myhive3.db';
b.对数据库可以增加描述信息(通过comment)
hive>create database if not exists myhive4 comment '创建hive测试库';
c.对数据库添加属性信息
hive>create database myhive5 with dbproperties ('name'='zhangsan','data'='2018-8-14');
d.使用数据库
hive>use myhive;
e.删除数据库
hive>drop database if exists myhive5;
hive>drop database if not exists myhive3 cascade;
说明:cascade表示级联关系;restrict表示限制约束(默认值);
2.修改数据库
除数据库的属性以外,其他信息均不能修改:
hive>alter database myhive5 set dbproperties ('name'='lyr','data'='2018-7-12');
3.创建表
a.创建表
hive>create table hive.student (sid int comment 'num',name string comment 'mingzi') comment 'student_table' tblproperties ('name'='leiyanrui','data'='2-18-8-15');
b.查看表属性
hive>desc hive.student;
hive>desc extended hive.student;
c.拷贝表模型,创建新的表(数据不拷贝)
hive>create table hive.student2 like hive.student;
4.管理表
默认创建的表均为管理表,表达形式{MySQL:hive:TBLS}表的TBL_TYPE字段显示为MANAGER_TABLE;
一般也把管理表称为“内部表”;
内部表特性:将数据move移动到{hive.metastore.warehouse.dir}目录相关的database下;
5.外部表
$hadoop fs -mkdir /data
$hadoop fs -mkdir /data/stocks
$>hadoop fs -put /mnt/hgfs/2.安装环境/data/stocks/stocks.csv /data/stocks
hive>create external table if not exists stocks (exchange1 string,symbol string,ymd string,price_open float,price_high float,price_low float,price_close float,volume int,price_adj_close float) row format delimited fields terminated by ',' location '/data/stocks';
hive>select count(*) from stocks;
6.分区表partition
hive中的分区针对{表组织}进行规划的;
[静态分区]
hive>create table logs(st bigint,line string) partitioned by (dt string,country string) row format delimited fields terminated by ',';
$>gedit logs1.txt
12,ccc
32,aaa
31,ffff
hive>load data inpath '/home/hyxy/logs.txt' into table hive.logs partition (dt='2018-9-8',country='changchun');
hive>load data local inpath '/home/hyxy/logs1.txt' into table hive.logs partition(dt='2018-7-1',country='beijing');
hive>select * from hive.logs;
hive>select * from hive.logs where country='beijing';
查看分区
hive>show partition hive.logs;
注意:静态分区的缺点:针对分区列,手动设置,如果分区数据比较多的话,将会较麻烦。
[动态分区]
hive>create table stocks_partition (exchange1 string,symbol string,ymd string ,price_open,float,price_high,float,price_low,float,price_close float,volume int,price_adj_close float) partition by (year string,month string) row format delimited fields terminated by ',';
hive>set hive.exec.dynamic.partition,mode=nonstrit;
hive>set hive.exec.max.dynamic.partitions=100000;
hive>set hive.exec.max.dynamic.aprtitions.pernode=100000;
hive>insert overwrite table stocks_partitions partition(year,month) select exchange1,symbol,ymd,price_open,price_high,price_low,price_close,volume,price_adj_close,substr(ymd,0,4) as year,substr(ymd,6,2) as month from stocks;
说明:按照year和month进行分区。