Hive行列转换
1、行转列 (根据主键,进行多行合并一列)
使用函数:concat_ws(‘,’,collect_set(column))
- collect_list 不去重
- collect_set 去重
- column 的数据类型要求是 string
1.1、构建测试数据
vi row_to_col.txt
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
a b 2
a b 3
c d 4
c d 5
c d 6
1.2、建表
create table tmp_jiangzl_test
(col1 string,
col2 string,
col3 string)
row format delimited fields terminated by ' '
stored as textfile;
(col1 string,
col2 string,
col3 string)
row format delimited fields terminated by ' '
stored as textfile;
1.3、加载数据
load data local inpath '/root/test-temp/row_to_col.txt' into table tmp_jiangzl_test;
1.4、执行转换
select col1,col2,concat_ws(',',collect_set(col3)) as bian
from tmp_jiangzl_test
group by col1,col2;
from tmp_jiangzl_test
group by col1,col2;
2、列转行 (对某列拆分,一列拆多行)
使用函数:lateral view explode(split(column, ‘,’)) num
2.1、构建测试数据
vi col_to_row.txt
a b 1,2,3
c d 4,5,6
c d 4,5,6
2.2、建表
create table col_to_row_test(
col1 string,
col2 string,
col3 string)
row format delimited fields terminated by ' '
stored as textfile;
col1 string,
col2 string,
col3 string)
row format delimited fields terminated by ' '
stored as textfile;
2.3、加载数据
load data local inpath '/root/test-temp/col_to_row.txt' into table col_to_row_test;
2.4、执行转换
select col1, col2, col3_new
from col_to_row_test a
lateral view explode(split(col3,',')) b AS col3_new;
from col_to_row_test a
lateral view explode(split(col3,',')) b AS col3_new;