• 2.1-2.2 Hive 中数据库(Table、Database)基本操作


    官网文档:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

    一、create table

    1、官方字段

    #
    #
    
    CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    -- (Note: TEMPORARY available in Hive 0.14.0 and later)
      [(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
      [COMMENT table_comment]
      [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
      [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
      [SKEWED BY (col_name, col_name, ...)                  -- (Note: Available in Hive 0.10.0 and later)]
         ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
         [STORED AS DIRECTORIES]
      [
       [ROW FORMAT row_format] 
       [STORED AS file_format]
         | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  -- (Note: Available in Hive 0.6.0 and later)
      ]
      [LOCATION hdfs_path]
      [TBLPROPERTIES (property_name=property_value, ...)]   -- (Note: Available in Hive 0.6.0 and later)
      [AS select_statement];   -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
     
    CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
      LIKE existing_table_or_view_name
      [LOCATION hdfs_path];
     
    data_type
      : primitive_type
      | array_type
      | map_type
      | struct_type
      | union_type  -- (Note: Available in Hive 0.7.0 and later)
     
    primitive_type
      : TINYINT
      | SMALLINT
      | INT
      | BIGINT
      | BOOLEAN
      | FLOAT
      | DOUBLE
      | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
      | STRING
      | BINARY      -- (Note: Available in Hive 0.8.0 and later)
      | TIMESTAMP   -- (Note: Available in Hive 0.8.0 and later)
      | DECIMAL     -- (Note: Available in Hive 0.11.0 and later)
      | DECIMAL(precision, scale)  -- (Note: Available in Hive 0.13.0 and later)
      | DATE        -- (Note: Available in Hive 0.12.0 and later)
      | VARCHAR     -- (Note: Available in Hive 0.12.0 and later)
      | CHAR        -- (Note: Available in Hive 0.13.0 and later)
     
    array_type
      : ARRAY < data_type >
     
    map_type
      : MAP < primitive_type, data_type >
     
    struct_type
      : STRUCT < col_name : data_type [COMMENT col_comment], ...>
     
    union_type
       : UNIONTYPE < data_type, data_type, ... >  -- (Note: Available in Hive 0.7.0 and later)
     
    row_format
      : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
            [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
            [NULL DEFINED AS char]   -- (Note: Available in Hive 0.13 and later)
      | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
     
    file_format:
      : SEQUENCEFILE
      | TEXTFILE    -- (Default, depending on hive.default.fileformat configuration)
      | RCFILE      -- (Note: Available in Hive 0.6.0 and later)
      | ORC         -- (Note: Available in Hive 0.11.0 and later)
      | PARQUET     -- (Note: Available in Hive 0.13.0 and later)
      | AVRO        -- (Note: Available in Hive 0.14.0 and later)
      | JSONFILE    -- (Note: Available in Hive 4.0.0 and later)
      | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
     
    constraint_specification:
      : [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
        [, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE


    2、建表例子

    例子1、2

    ##################栗子#####################
    
    --------------------------------------------------------------------------------------------
    create table IF NOT EXISTS default.bf_1og_20150913        #在default库下创建一个表,不存在则创建;
    (
        ip string COMMENT 'remote ip address',         #COMMENT:字段注释
        user string, 
        req_url string COMMENT 'user request url'
    )
    COMMENT ' BeiFeng Web Access Logs'            #表注释
    ROW FORMAT DELIMITED FIELDS TERMINATED BY‘ ’        #hive的数据存在hdfs上,此项指定数据文件中列之间的间隔符
    STORED AS TEXTFILE                    #数据格式
    LOCATION '/user/bf/hive/warehouse/bf_log_201501913'        #表的存储路径,可以自己指定
    
    
    
    
    --------------------------------------------------------------------------------------------
    create table IF NOT EXISTS default.bf_1og_20150913_sa 
    AS select ip, req_url from default.bf_log_20150913;    #创建一个表,此表的字段来源于查询另外一个表


    例子3

    ################################
    create table IF NOT EXISTS default.bf_log_20150914
    like default.bf_log_20150913            #根据另外一张表来创建表


    二、演示

    1、建表

    #创建表
    hive (default)> create table IF NOT EXISTS default.bf_1og_20150913(
                  > ip string COMMENT 'remote ip address',
                  > user string, 
                  > req_url string COMMENT 'user request url')
                  > COMMENT 'BeiFeng Web Access Logs'
                  > ROW FORMAT DELIMITED FIELDS TERMINATED BY' '
                  > STORED AS TEXTFILE;
    OK
    Time taken: 0.361 seconds
    
    hive (default)> show tables;
    OK
    tab_name
    bf_1og_20150913
    bf_log
    Time taken: 0.052 seconds, Fetched: 2 row(s)


    2、导入数据

    #########
    hive (default)> load data local inpath '/opt/datas/bf-log.txt' into table default.bf_1og_20150913;
    Copying data from file:/opt/datas/bf-log.txt
    Copying file: file:/opt/datas/bf-log.txt
    Loading data to table default.bf_1og_20150913
    Table default.bf_1og_20150913 stats: [numFiles=1, numRows=0, totalSize=141, rawDataSize=0]
    OK
    Time taken: 0.36 seconds
    
    #########
    hive (default)> select * from default.bf_1og_20150913;
    OK
    bf_1og_20150913.ip    bf_1og_20150913.user    bf_1og_20150913.req_url
    "27.38.5.159"    "-"    "31/Aug/2015:00:04:53
    "27.38.5.159"    "-"    "31/Aug/2015:00:04:37
    "27.38.5.159"    "-"    "31/Aug/2015:00:04:53
    Time taken: 0.156 seconds, Fetched: 3 row(s)


    3、第二种建表例子

    #建表
    hive (default)> create table IF NOT EXISTS default.bf_1og_20150913_sa AS select ip, req_url from default.bf_1og_20150913;
    
    
    #
    hive (default)> show tables;
    OK
    tab_name
    bf_1og_20150913
    bf_1og_20150913_sa
    
    
    #
    hive (default)> select * from default.bf_1og_20150913_sa;
    OK
    bf_1og_20150913_sa.ip    bf_1og_20150913_sa.req_url
    "27.38.5.159"    "31/Aug/2015:00:04:53
    "27.38.5.159"    "31/Aug/2015:00:04:37
    "27.38.5.159"    "31/Aug/2015:00:04:53
    Time taken: 0.028 seconds, Fetched: 3 row(s)


    4、第三种建表例子

    ##
    hive (default)> create table IF NOT EXISTS default.bf_log_20150914 like default.bf_1og_20150913;
    OK
    Time taken: 0.046 seconds
    
    ##
    hive (default)> show tables;                                                                    
    OK
    tab_name
    bf_1og_20150913
    bf_1og_20150913_sa
    bf_log
    bf_log_20150914
    Time taken: 0.013 seconds, Fetched: 4 row(s)
    
    
    #这里是指copy表结构,不copy表数据
    hive (default)> select * from default.bf_log_20150914;
    OK
    bf_log_20150914.ip    bf_log_20150914.user    bf_log_20150914.req_url
    Time taken: 0.029 seconds


    三、Create Database

    DDL:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

    DML:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML

    1、Create Database

    CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
      [COMMENT database_comment]
      [LOCATION hdfs_path]
      [WITH DBPROPERTIES (property_name=property_value, ...)];

    具体:

    ##
    创建
    create database db_name;
    
    create database if not exists db_name;        #标准
    
    #指定HDFS上的存储位置
    create database if not exists db_name location ‘/user/root/hive/warehouse/db_name.db’;
    
    
    ##
    查看
    show databases;
    show databases like 'db_hive*';
    
    desc database extended db_name;
    
    
    ##
    删除
    drop database db_name;
    
    drop database db_name cascade;
    
    drop database if exists db_name;
    
    
    ##
    Alter Database
    ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...);   -- (Note: SCHEMA added in Hive 0.14.0)
     
    ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role;   -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
      
    ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; -- (Note: Hive 2.2.1, 2.4.0 and later)
  • 相关阅读:
    大数据HIve
    大数据笔记
    [Leetcode]653.Two Sum IV
    [Leetcode]652.Find Duplicate Subtrees
    [Leetcode]650.2 Keys Keyboard
    [Leetcode]648.Replace Words
    [Leetcode Weekly Contest]173
    [总结]最短路径算法
    [Leetcode]647.Palindromic Substrings
    [Leetcode]646.Maximum Length of Pair Chain
  • 原文地址:https://www.cnblogs.com/weiyiming007/p/10748998.html
Copyright © 2020-2023  润新知