• hive创建分区表


    #创建分区表
    CREATE TABLE if not exists data_center.test_partition (
    id int,
    name string,
    age int
    )
    PARTITIONED BY (date_id string)
    row format delimited fields terminated by ','
    stored as textfile
    #
    LOCATION
    'hdfs://master:9000/user/hive/warehouse/data_center.db/test_table';

    #添加分区
    alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-06') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06'
    alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-07') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07'
    alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-08') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08'


    #插入数据操作:
    set hive.exec.compress.output=true; 
    set mapred.output.compress=true; 
    set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; 
    set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec; 
    SET mapred.output.compression.type=BLOCK;
    insert overwrite table seqfile_table select * from textfile_table;

    name:pass_address_id,type:string;name:order_id,type:string;name:address_type,type:string;name:receiver,type:string;name:receiver_tel,type:string;name:zip_code,type:string;name:province,type:string;name:city,type:string;name:district,type:string;name:address,type:string;name:x_coordinate,type:string;name:y_coordinate,type:string;name:del_flg,type:string;name:create_time,type:string;name:create_user,type:string;name:update_time,type:string;name:update_user,type:string;name:address_order,type:string;name:midway_order_status,type:string;name:street,type:string;name:order_type,type:string;name:out_order_id,type:string;name:poi_title,type:string

    复制表结构
    create table tmp.soon_tbl_address like select * from default.soon_tbl_address;

    #拷贝数据:从default库soon_tbl_address到tmp库soon_tbl_address
    insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-05') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 3
    insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-06') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 6
    insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-07') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 9
    insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-08') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 12

    pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update,date_id


    /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
    hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-02-08/000000_0

    #删除分区数据
    ALTER TABLE soon_tbl_address DROP PARTITION (date_id='2017-06-07');

    #删除表中数据,保留表
    insert overwrite table tmp.soon_tbl_address select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address where 1=0
    truncate table tmp.soon_tbl_address


    #hadoop查看文件
    [ds@master ~]$ hadoop fs -ls /user/hive/warehouse/tmp.db/soon_tbl_address
    17/06/09 16:02:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    Found 5 items
    drwxr-xr-x - ds supergroup 0 2017-06-09 11:07 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06
    drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
    drwxr-xr-x - ds supergroup 0 2017-06-09 14:23 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
    drwxr-xr-x - ds supergroup 0 2017-06-09 14:56 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__b6a456c4_73b6_4fec_8dfa_ddbe38f55856
    drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08

    #hadoop删除文件及递归文件
    [ds@master ~]$ hadoop fs -rmr hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
    rmr: DEPRECATED: Please use 'rm -r' instead.
    17/06/09 16:03:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    17/06/09 16:03:25 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2' to trash at: hdfs://master:9000/user/ds/.Trash/Current/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
    [ds@master ~]$


    #导出数据到本地
    INSERT OVERWRITE LOCAL DIRECTORY '/home/users/my' SELECT * FROM tmp.soon_tbl_address

    #linux上本地.txt,导入到表某个分区
    load data LOCAL INPATH '/home/users/my/test_table.txt' OVERWRITE into table data_center.test_table PARTITION(date='2017-06-07')
    load data LOCAL INPATH '/home/users/my/test_partition.txt' OVERWRITE into table tmp.temp_test_partition PARTITION(date='2017-06-13')

    #手动创建表
    CREATE TABLE IF NOT EXISTS data_center.test_no_partition( 
    id int , 
    name string , 
    age int

    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
    stored as textfile;

    创建空表
    CREATE TABLE data_center.test_no_partition LIKE tmp.test_no_partition;


    #修改表字段名称、类型
    ALTER TABLE test_no_partition CHANGE ip id int;

    #删除表及数据
    drop table test_no_partition;

    #删除表数据,表结构还在
    delete from test_no_partition;
    insert overwrite table test_no_partition select id,age,name from test_no_partition where id=3;

    #给hive库加权限
    hdfs dfs -chmod 777 hdfs://master:9000/user/hive/warehouse/tmp.db


    #hadoop文件夹重命名
    hadoop fs -mv hdfs://master:9000/user/hive/warehouse/ods.db/test_table__cb086c0f_88ee_4623_938c_311a1e717c8a hdfs://master:9000/user/hive/warehouse/ods.db/test_table

    CREATE TABLE ods.test_table_tmp(
    id int,
    name_path string,
    parent_path string
    )
    row format delimited fields terminated by ','
    stored as textfile
    #路径可以不要
    LOCATION
    'hdfs://master:9000/user/hive/warehouse/ods.db/test_table_tmp'



    hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a


    #导出到本地文件
    #执行导出本地文件命令:
    insert overwrite local directory '/home/ds/user/my' select * from ods.test_table;

    #查看hdfs文件内容
    hdfs dfs -cat hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a


    CREATE TABLE `tmp.temp_test_partition`(
    `id` int, 
    `name` string, 
    `age` int, 
    `date_id` string)
    row format delimited fields terminated by ','
    stored as textfile


    CREATE TABLE `data_center.test_partition`( 
    `id` int, 
    `name` string, 
    `age` int) 
    PARTITIONED BY ( `date_id` string) 
    ROW FORMAT SERDE 
    'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
    WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',') 
    STORED AS INPUTFORMAT 
    'org.apache.hadoop.mapred.TextInputFormat' 
    OUTPUTFORMAT 
    'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 
    LOCATION 
    'hdfs://master:9000/user/hive/warehouse/data_center.db/test_partition' 
    TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')

    #手动创建分区表
    CREATE TABLE if not exists data_center.test_partition (
    id int,
    name string,
    age int
    )
    PARTITIONED BY (date string)
    row format delimited fields terminated by ','
    stored as textfile



    #修改分区
    ALTER TABLE table_name PARTITION (dt='2008-08-08') SET LOCATION "new location";
    ALTER TABLE data_center.test_partition PARTITION (date_id='2008-08-08') RENAME TO PARTITION (dt='20080808'); 

    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=1000;
    set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
    insert overwrite table data_center.test_partition partition (date_id) select * from data_center.test_partition where name in ('ccc','lisi')


    #分区增量更新
    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.max.dynamic.partitions.pernode=1000;
    set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
    insert overwrite table data_center.test_partition partition(date_id) 
    select tmp.id,tmp.name,tmp.age,tmp.date_id from tmp.temp_test_partition tmp 
    union all select a.* from data_center.test_partition a
    left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16')


    insert overwrite table data_center.
    /home/ds/users/wcy/mysql_to_hdfs_update.txt

    CREATE TABLE zjs_cust_endurance_time_copy (
    id int,
    computer_time string ,
    user_city_code string ,
    avg_patient_time double,
    update_time date
    )
    row format delimited fields terminated by ','
    stored as textfile

    #从本地加载txt数据到表中
    load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_update.txt' 
    OVERWRITE into table data_center.zjs_cust_endurance_time_copy
    hdfs目标表中已有数据:
    106 2017-06-21 120001 1062.22 2017-05-27
    107 2017-06-21 310001 1387.85 2017-05-27
    108 2017-06-21 111111 100.0 2017-05-27

    mysql中抽取数据:
    106 2017-04-01 120000 2062.22 2017-04-27
    107 2017-04-01 310000 2387.85 2017-04-27

    执行后hdfs目标表中数据:
    108 2017-06-21 111111 100.0 2017-05-27
    106 2017-04-01 120000 2062.22 2017-04-27
    107 2017-04-01 310000 2387.85 2017-04-27


    #表结果中没有id列,后手动添加id
    CREATE TABLE zjs_cust_endurance_time_copy (
    computer_time string ,
    user_city_code string ,
    avg_patient_time double,
    update_time date,
    id int 
    )
    row format delimited fields terminated by ','
    stored as textfile

    hdfs目标表中已有数据:
    2017-06-21 120001 1062.22 2017-05-27 106
    2017-06-21 310001 1387.85 2017-05-27 107
    2017-06-21 111111 100.0 2017-05-27 108

    mysql表中抽数据:
    2017-04-01 120000 2062.22 2017-04-27 06:46:30.0 106 
    2017-04-01 310000 2387.85 2017-04-27 06:46:30.0 107 
    2017-04-01 320100 2027.64 2017-04-27 06:46:30.0 108 
    2017-04-01 330100 3763.16 2017-04-27 06:46:30.0 109

    执行后hdfs目标表中数据:
    2017-04-01 120000 2062.22 2017-04-27 106
    2017-04-01 310000 2387.85 2017-04-27 107
    2017-04-01 320100 2027.64 2017-04-27 108
    2017-04-01 330100 3763.16 2017-04-27 109


    #从本地加载txt数据到分区表中
    CREATE TABLE data_center.zjs_cust_endurance_time_copy_part (
    id int,
    computer_time string ,
    user_city_code string ,
    avg_patient_time double,
    update_time date
    )
    PARTITIONED BY (date string)
    row format delimited fields terminated by ','
    stored as textfile

    load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_partupdate.txt' 
    OVERWRITE into table data_center.zjs_cust_endurance_time_copy_part PARTITION(date='2017-06-21')

    hdfs目标表中已有数据:
    select * from data_center.zjs_cust_endurance_time_copy_part;
    108 2017-06-21 111111 100.0 2017-05-27 2017-06-21
    106 2017-04-01 222222 99.0 2017-04-27 2017-06-21

    mysql中抽取数据:
    106 2017-04-01 120000 2062.22 2017-04-27 06:46:30.0
    107 2017-04-01 310000 2387.85 2017-04-27 06:46:30.0
    108 2017-04-01 320100 2027.64 2017-04-27 06:46:30.0

    insert overwrite table data_center.zjs_cust_endurance_time_copy_part partition(date) 
    select tmp.id,tmp.computer_time,tmp.user_city_code,tmp.avg_patient_time,tmp.update_time,tmp.date 
    from tmp.temp_zjs_cust_endurance_time_copy_part tmp 
    union all select a.* from data_center.zjs_cust_endurance_time_copy_part a 
    left outer join tmp.temp_zjs_cust_endurance_time_copy_part b 
    on a.id=b.id where b.id is null


    #hdfs中文件数据下载到本地
    hadoop fs -get 源数据路径 本地文件夹路径
    hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/word_category_weights/000000_0 /home/ds/mydata
    hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/zjs_user_result/000000_0 /home/ds/mydata/zjs_user_result
    下载文件
    sz /home/ds/mydata/000000_0


    create table data_center.word_category_weights(
    keyword string,
    category_id string,
    weight_auto double,
    category_name string,
    del int
    )
    row format delimited fields terminated by ','
    stored as textfile;

    load data LOCAL INPATH '/home/users/wcy/000000_0' 
    OVERWRITE into table data_center.word_category_weights


    load data LOCAL INPATH '/home/users/wcy/000000_0_more_data' 
    OVERWRITE into table data_center.word_category_weights


    load data LOCAL INPATH '/home/users/wcy/zjs_user_result' OVERWRITE into table data_center.zjs_user_result

    load data LOCAL INPATH '/home/users/wcy/goodsinfo' OVERWRITE into table data_center.goodsinfo

    #启动hive元数据服务进程
    hive --service metastore &
    hive --service hiveserver2 &


    UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":""","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
    2017-06-26 16:39:24.963 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
    回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
    2017-06-26 16:39:44.629 [job-0] INFO StandAloneJobContainerCommunicator - Total 2560 records, 45917 bytes | Speed 4.48KB/s, 256 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.006s | All Task WaitReaderTime 0.138s | Percentage 0.00%
    2017-06-26 16:39:45.393 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
    2017-06-26 16:39:53.884 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
    2017-06-26 16:39:54.630 [job-0] INFO StandAloneJobContainerCommunicator - Total 4608 records, 81924 bytes | Speed 3.52KB/s, 204 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 11.211s | All Task WaitReaderT

    UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":""","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
    2017-06-27 10:37:49.949 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command

    hadoop fs -rm hdfs://SAD-HDP-003:9000/user/hive/warehouse/ods.db/soon_tbl_address_part/.hive-staging_hive_2017-06-23_18-23-08_102_5432451293100937443-1

    CREATE TABLE goodsinfo(
    create_time string,
    deal_code bigint , 
    store_code bigint, 
    category_code1 bigint, 
    category_code2 bigint, 
    category_code3 bigint, 
    category_code4 bigint, 
    goods_code bigint, 
    term_code int, 
    term_name string, 
    store_name string, 
    goods_name string, 
    deal_name string, 
    goods_image string, 
    goods_pv int, 
    goods_uv int, 
    goods_pv_total int, 
    time_total int, 
    jump2_pv int, 
    collect_total int, 
    add_total int, 
    pay_total int, 
    pay_amount decimal(10,2))
    row format delimited fields terminated by ','
    stored as textfile;

    CREATE TABLE zjs_user_result_1(
    compute_day string, 
    member_id string, 
    mobile string, 
    uc_user_id String, 
    real_name string, 
    register_time string, 
    order_city_code string, 
    city_name string, 
    manage_amount double, 
    deduction double, 
    freight double, 
    actual_paid double, 
    insurance_amount double, 
    complete_orders String, 
    deduction_all double, 
    freight_all double, 
    complete_orders_all String, 
    last_complete_time string, 
    order_time string, 
    cancel_order_before String, 
    cancel_order_after String, 
    order_nums String, 
    invite_peoples String, 
    invite_peoples_all String, 
    share_orders String, 
    share_orders_all String)
    row format delimited fields terminated by ','
    stored as textfile;

  • 相关阅读:
    图解SSH隧道功能
    su和sudo命令的用法
    Linux下下载百度网盘资料
    WPS Office 2019 for Linux来了
    Linux下的录屏软件Kazam
    用hdparm获取硬盘参数
    百度,你能不能有点节操?
    locate的基本用法
    Java class不分32位和64位
    Web站点抓取工具webhttrack
  • 原文地址:https://www.cnblogs.com/ngy0217/p/10051380.html
Copyright © 2020-2023  润新知