1. create table
创建一张目标表,指定分隔符和存储格式:
create table tmp_2 (resource_id bigint ,v int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' LINES TERMINATED BY ' ' STORED AS TEXTFILE
TBLPROPERTIES ('serialization.null.format' = '');
//ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,'---这里设置字段间以逗号分隔; //LINES TERMINATED BY ' ' ---这里设置行与行之间以换行分隔 //STORED AS TEXTFILE ---指定文件以text形式存储;Hive 中默认有三个文件格式 TextFile,SequenceFile 以及 RCFile
//在建表的时候还可以通过"PARTITIONED BY(file STRING)"指定分区字段
textfile 是以文本文件格式存在,利于python/java进行数据处理;
sequencefile 是以压缩方式存储
rcfile 也比较常用
parquet是列式存储
alter table hive_tb set serdeproperties('serialization.null.format' = '');
-- 修改表,把NULL值转化为 '',节省存储空间。
drop table if exists sa_base_order; create table IF NOT EXISTS sa_base_order...
使用上面两个判断能够,避免创建表已经存在的error
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User', country STRING COMMENT 'country of origination' ) COMMENT 'This is the staging page view table' ROW FORMAT DELIMITED FIELDS TERMINATED BY '