Hive之命令
说明:此博客只记录了一些常见的hql,create/select/insert/update/delete这些基础操作是没有记录的。
一、时间级
select day -- 时间 ,date_add(day,1 - dayofweek(day)) as week_first_day -- 本周第一天_周日 ,date_add(day,7 - dayofweek(day)) as week_last_day -- 本周最后一天_周六 ,date_add(day,1 - case when dayofweek(day) = 1 then 7 else dayofweek(day) - 1 end) as week_first_day -- 本周第一天_周一 ,date_add(day,7 - case when dayofweek(day) = 1 then 7 else dayofweek(day) - 1 end) as week_last_day -- 本周最后一天_周日 ,next_day(day,'TU') as next_tuesday -- 当前日期的下个周二 ,trunc(day,'MM') as month_first_day -- 当月第一天 ,last_day(day) as month_last_day -- 当月最后一天 ,to_date(concat(year(day),'-',lpad(ceil(month(day)/3) * 3 -2,2,0),'-01')) as season_first_day -- 当季第一天 ,last_day(to_date(concat(year(day),'-',lpad(ceil(month(day)/3) * 3,2,0),'-01'))) as season_last_day -- 当季最后一天 ,trunc(day,'YY') as year_first_day -- 当年第一天 ,last_day(add_months(trunc(day,'YY'),12)) as year_last_day -- 当年最后一天 ,weekofyear(day) as weekofyear -- 当年第几周 ,second(day) as second -- 秒钟 ,minute(day) as minute -- 分钟 ,hour(day) as hour -- 小时 ,day(day) as day -- 日期 ,month(day) as month -- 月份 ,lpad(ceil(month(day)/3),2,0) as season -- 季度 ,year(day) as year -- 年份 from ( select '2018-01-02 01:01:01' as day union all select '2018-02-02 02:03:04' as day union all select '2018-03-02 03:05:07' as day union all select '2018-04-02 04:07:10' as day union all select '2018-05-02 05:09:13' as day union all select '2018-06-02 06:11:16' as day union all select '2018-07-02 07:13:19' as day union all select '2018-08-02 08:15:22' as day union all select '2018-09-02 09:17:25' as day union all select '2018-10-02 10:19:28' as day union all select '2018-11-02 11:21:31' as day union all select '2018-12-02 12:23:34' as day ) t1 ;
1.1 获取年、月、日、小时、分钟、秒、当年第几周
select year('2018-02-27 10:00:00') as year ,month('2018-02-27 10:00:00') as month ,day('2018-02-27 10:00:00') as day ,hour('2018-02-27 10:00:00') as hour ,minute('2018-02-27 10:00:00') as minute ,second('2018-02-27 10:00:00') as second ,weekofyear('2018-02-27 10:00:00') as weekofyear ; +-------+--------+------+-------+---------+---------+-------------+--+ | year | month | day | hour | minute | second | weekofyear | +-------+--------+------+-------+---------+---------+-------------+--+ | 2018 | 2 | 27 | 10 | 0 | 0 | 9 | +-------+--------+------+-------+---------+---------+-------------+--+
1.2 系统时间
-- 获取系统时间 select from_unixtime(unix_timestamp()); select from_unixtime(unix_timestamp(),'yyyy--MM--dd HH:mm:ss') as current_time; -- 获取当天时间(yyyy-MM-dd) select current_date; -- 获取系统时间戳 select unix_timestamp(); -- 时间戳转日期 select from_unixtime(unix_timestamp(),'yyyyMMdd'); -- 日期加减:date_add(时间,增加天数) date_sub(时间,减少天数),返回日期减少天后的日期 select regexp_replace(date_add(FROM_UNIXTIME(UNIX_TIMESTAMP()),-1),'-',''); select regexp_replace(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP()),1),'-',''); -- 日期差值:datediff(结束日期,开始日期),返回结束日期减去开始日期的天数。 select datediff(CURRENT_DATE,'2017-01-01') as datediff; -- 查询当前系统时间(包括毫秒数) select current_timestamp; -- 查询当月第几天 select dayofmonth(current_date); -- 月末 select last_day(current_date); -- 当月第1天 select date_sub(current_date,dayofmonth(current_date)-1);
二、库表级
-- 查看所有库信息 show database; -- 进入指定数据库 use {dbName}; -- 查看指定库的所有表 show tables ; -- 正则表达式过滤表 show tables 'table*'; -- 查看hive表信息 desc formatted {dbName.tabName}; -- 查看hive库信息 describe database extended {dbName}; -- 查询表的某一列 describe {dbName.tabName.fieldName}; -- 查看表结构详情 show create table {dbName.tabName}; -- 查看表的分区 show partitions tablename;
三、字段级
-- 删除hive库以及库中的表 drop database {dbName} CASCADE; -- hive增加主键 alter table {tabName} set TBLPROPERTIES('PRIMARY_KEY'='field1,field2,field3'); -- 添加字段 alter table {dbName.tabName} add columns(column_1 int,column_2 string); -- 修改字段名称 alter table {dbName.tabName} change column_1 new_column_1 int; -- 删除字段(hive暂不支持) alter table {dbName.tabName} drop columns column_1; -- 修改字段注释 alter table {dbName.tabName} change column column_1 column_1 string COMMENT '字段注释';
四、其他
-- 字符串拼接 select concat('c1','c2') as c3; -- 查看数据库中所有的表 show tables in {dbName}; -- 添加jar包 add jar hdfs://testUDF.jar; -- 创建临时函数 create temporary function dbName.funName as '{com.libt.testUDF}'; -- 创建永久函数 create function dbName.funName as '{com.libt.testUDF}';
五、优化
-- 设置执行引擎 set hive.execution.engine=mr; -- 设置队列 set mapreduce.job.queuename=dev; --开启动态分区 set hive.exec.dynamic.partition=true; --允许所有分区为动态分区 set hive.exec.dynamic.partition.mode=nonstrict; - analyze 普通表 analyze table dbName.tableName compute statistics; - analyze 分区表 analyze table dbName.tableName partition(ds) compute statistics; - 重新计算元数据 msck repair table tableName;