将数据到入hive的无分区表,再将无分区表导入hive的有分区表:
--备份 create table tds_package_secinfobk as select * from tds_package_secinfo; --导入分区表 tds_package_secinfo CREATE TABLE tds_package_secinfo (package_name string,program_name string,keyhash string) PARTITIONED BY(risk_label string) ROW FORMAT delimited fields terminated by ' ' LOCATION '/user/hive/warehouse/tds_package_secinfo' LOAD DATA INPATH '/user/hive/warehouse/邪教app.txt' OVERWRITE INTO TABLE tds_package_secinfo PARTITION(risk_label='邪教app'); --无分区表导有分区表 set hive.exec.max.dynamic.partitions= 100000; set hive.exec.max.dynamic.partitions.pernode= 100000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.support.concurrency=false; set mapreduce.job.reduce.slowstart.completedmaps=1; insert into table tds_package_secinfo partition(risk_label) select package_name ,'' ,'' ,risk_label from hue_package_secinfo; hbase_package_secinfo
查询该hive表的包名和标签列:
select package_name,risk_label from tds_package_secinfo
hbase表需要先建表,才能插入数据
建表:
CREATE TABLE hbase_package_secinfo (package_name string,risk_label string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:risk_lable") TBLPROPERTIES ("hbase.table.name"="hbase_package_secinfo");
插入数据:
insert into hbase_package_secinfo select package_name,risk_label from tds_package_secinfo
-------------------------------------------------建表完成-----------------------------------------------------------
查询语句:
select count(*),dt from MDS_ENGINE_VIR_URL group by dt order by dt desc; select count(*),dt from MDS_ENGINE_WARESOURCE_INFO_NEW group by dt order by dt desc; select count(*),dt from MDS_VICTIM_INFO group by dt order by dt desc;
--------------------------------------------
异常:
Error: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"package_name":"","program_name":"","keyhash":"","risk_label":"__HIVE_DEFAULT_PARTITION__"} at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:179) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"package_name":"","program_name":"","keyhash":"","risk_label":"__HIVE_DEFAULT_PARTITION__"} at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:507) at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:170) ... 8 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: org.apache.hadoop.hive.serde2.SerDeException: java.lang.IllegalArgumentException: Row length is 0 at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:731) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:815) at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:815) at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:95) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:157) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:497) ... 9 more Caused by: org.apache.hadoop.hive.serde2.SerDeException: java.lang.IllegalArgumentException: Row length is 0 at org.apache.hadoop.hive.hbase.HBaseSerDe.serialize(HBaseSerDe.java:286) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:666) ... 15 more Caused by: java.lang.IllegalArgumentException: Row length is 0 at org.apache.hadoop.hbase.client.Mutation.checkRow(Mutation.java:534) at org.apache.hadoop.hbase.client.Put.<init>(Put.java:110) at org.apache.hadoop.hbase.client.Put.<init>(Put.java:68) at org.apache.hadoop.hbase.client.Put.<init>(Put.java:58) at org.apache.hadoop.hive.hbase.HBaseRowSerializer.serialize(HBaseRowSerializer.java:109) at org.apache.hadoop.hive.hbase.HBaseSerDe.serialize(HBaseSerDe.java:282) ... 16 more
原因:表中的内容为空,所以插不进去
解决方法:加上条件:
where risk_label!='__HIVE_DEFAULT_PARTITION__'
所以插入语句应该为:
insert into hbase_package_secinfo select package_name,risk_label from tds_package_secinfo where risk_label!='__HIVE_DEFAULT_PARTITION__';
最后删除risk_label":"__HIVE_DEFAULT_PARTITION__的那个分区
ALTER TABLE tds_package_secinfo DROP IF EXISTS PARTITION(risk_label='__HIVE_DEFAULT_PARTITION__' );