通过hue进行数据导入: 1,create table demo_id(`id` string) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' 2, load data inpath '/user/demo.csv' into table demo_id #sql数据筛选 create table `table_name` as select a.id,a.name,a.time from `a_name` a where a.type = 'A' and a.year = '2018' and A.month IN('01','02', '03') and a.idstr regxp '^\$\_[0-9][0-9]\0.+' #sql数据联表,左外连接查询 create table `table_name` as select b.id, b.name, b.month from `a_name` a JOIN `b_name` b on a.id =b.id where b.idstr regxp '^\$\_(?!01).+' #sql数据查询 group by create table `table_name` as select a.id,a.name,min(a.month) from `a_name` a where a.str = '你好' and a.m > '05' group by a.str,a.id #sql数据排序 order by,将查询结果按照a字段分组(partition),然后组内按照b字段排序,row_number() 用于标记顺序 create table `table_name` as select * from (select *,row_number() over(partition by t.deviceid order by t.time DESC) as RNO from `table_name_A` t) as TEMP where TEMP.RNO<7 #sql数据切分字段,并排序 selcet b.id, substr(b.str,3,2),count(b.name) as count from `table_name` b group by b.id #统计人数,按照用户去重 select count(distinct(a.id)) from `table_name` a #根据数字排序 order by cast(a.number as INT) #正则匹配多条件 where a.str regexp '^\$\_01\_(?!0\01|0\_00|0\_11).+'