• Hive练习


    一、基础DDL练习

    SHOW DATABASES;
    
    CREATE DATABASE IF NOT EXISTS db1 COMMENT 'Our database db1';
    
    SHOW DATABASES;
    
    DESCRIBE DATABASE db1;
    
    CREATE TABLE db1.table1 (word STRING, count INT);
    
    SHOW TABLES in db1;
    
    DESCRIBE db1.table1;
    
    USE db1;
    
    SHOW TABLES;
    
    SELECT * FROM db1.table1;
    
    DROP TABLE table1;
    
    DROP DATABASE db1;
    
    USE default;

    二、基础DML语句

    创建表
    create table if not exists user_dimension (
     uid STRING,
     name STRING,
     gender STRING,
     birth DATE,
     province STRING
    )ROW FORMAT DELIMITED //按行切分的意思
     FIELDS TERMINATED BY ','  //按逗号分隔的
    查看表信息
    describe user_dimension;
    
    show create table user_dimension;
    
    查看所有表
    show tables;
    
    载入本地数据
    load data local inpath '/home/orco/tempdata/user.data' overwrite into table user_dimension;
    
    载入HDFS上的数据
    load data inpath '/user/orco/practice_1/user.data' overwrite into table user_dimension;
    
    验证
    select * from user_dimension;
    
    查看hive在hdfs上的存储目录
    hadoop fs -ls /warehouse/
    hadoop fs -ls /warehouse/user_dimension

    三、复杂数据类型

    示例2:
    CREATE TABLE IF NOT EXISTS employees (
       name         STRING,
       salary       FLOAT,
       subordinates ARRAY<STRING>,
       deductions   MAP<STRING, FLOAT>,
       address      STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
     )
     ROW FORMAT DELIMITED
     FIELDS TERMINATED BY '01' 
     COLLECTION ITEMS TERMINATED BY '02' 
     MAP KEYS TERMINATED BY '03'
     LINES TERMINATED BY '
    '
     STORED AS TEXTFILE; //最后这一行,是默认,可以不写
    
    载入数据
    load data local inpath ' /home/orco/tempdata/data/employees.txt' overwrite into table employees ;
    
    查询数据
    SELECT name, deductions['Federal Taxes'] FROM employees WHERE deductions['Federal Taxes'] > 0.2;
    
    SELECT name, deductions['Federal Taxes'] FROM employees WHERE deductions['Federal Taxes'] > cast( 0.2 as float);
    
    SELECT name FROM employees WHERE subordinates[1] = 'Todd Jones';
    
    SELECT name, address FROM employees WHERE address.street RLIKE '^.*(Ontario|Chicago).*$';

    四、数据模型-分区

    为减少不必要的暴力数据扫描,可以对表进行分区,为避免产生过多小文件,建议只对离散字段进行分区

    建表
    CREATE TABLE IF NOT EXISTS stocks (
         ymd             DATE,
         price_open      FLOAT, 
         price_high      FLOAT,
         price_low       FLOAT,
         price_close     FLOAT,
         volume            INT,
         price_adj_close FLOAT
         )
         PARTITIONED BY (exchanger STRING, symbol STRING)
         ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
    
    载入数据
    load data local inpath '/home/orco/resources/apache-hive-2.1.1-bin/hivedata/stocks/NASDAQ/AAPL/stocks.csv' overwrite into table stocks partition(exchanger="NASDAQ", symbol="AAPL");
    
    show partitions stocks;
    
    load data local inpath '/home/orco/resources/apache-hive-2.1.1-bin/hivedata/stocks/NASDAQ/INTC/stocks.csv' overwrite into table stocks partition(exchanger="NASDAQ", symbol="INTC");
    
    load data local inpath '/home/orco/resources/apache-hive-2.1.1-bin/hivedata/stocks/NYSE/GE/stocks.csv' overwrite into table stocks partition(exchanger="NYSE", symbol="GE");
    
    show partitions stocks;
    
    查询
    SELECT * FROM stocks WHERE exchanger = 'NASDAQ' AND symbol = 'AAPL' LIMIT 10;
    
    SELECT ymd, price_close FROM stocks WHERE exchanger = 'NASDAQ' AND symbol = 'AAPL' LIMIT 10;
    
    查看HDFS文件目录
    hadoop fs -ls /warehouse/stocks/
    
    hadoop fs -ls /warehouse/stocks/exchanger=NASDAQ
    
    hadoop fs -ls /warehouse/stocks/exchanger=NASDAQ/symbol=AAPL

    六、外部表

    external关键字,删除表时,外部表只删除元数据,不删除数据,更加安全

    数据
    hadoop fs -put stocks /user/orco/
    
    创建外部表
    CREATE EXTERNAL TABLE IF NOT EXISTS stocks_external (
      ymd             DATE,
      price_open      FLOAT, 
      price_high      FLOAT,
      price_low       FLOAT,
      price_close     FLOAT,
      volume            INT,
      price_adj_close FLOAT
      )
      PARTITIONED BY (exchanger STRING, symbol STRING)
      ROW FORMAT DELIMITED 
      FIELDS TERMINATED BY ','
      LOCATION '/user/orco/stocks';
    
    select * from stocks_external;
    
    载入数据
    alter table stocks_external add partition(exchanger="NASDAQ", symbol="AAPL") location '/user/orco/stocks/NASDAQ/AAPL/'
    
    show partitions stocks_external;
    
    select * from stocks_external limit 10;
    
    alter table stocks_external add partition(exchanger="NASDAQ", symbol="INTC") location '/user/orco/stocks/NASDAQ/INTC/';
    
    alter table stocks_external add partition(exchanger="NYSE", symbol="IBM") location '/user/orco/stocks/NYSE/IBM/';
    
    alter table stocks_external add partition(exchanger="NYSE", symbol="GE") location '/user/orco/stocks/NYSE/GE/';
    
    show partitions stocks_external;
    
    查询
    SELECT * FROM stocks_external WHERE exchanger = 'NASDAQ' AND symbol = 'AAPL' LIMIT 10;
    
    SELECT ymd, price_close FROM stocks_external WHERE exchanger = 'NASDAQ' AND symbol = 'AAPL' LIMIT 10;
    
    select exchanger, symbol,count(*) from stocks_external group by exchanger, symbol;
    
    select exchanger, symbol, max(price_high) from stocks_external group by exchanger, symbol;
    
    删除表
    删除内部表stocks
    drop table stocks;
    
    查看HDFS上文件目录
    hadoop fs -ls /warehouse/
    
    删除外部表stocks_external
    drop table stocks_external;
    
    查看HDFS上文件目录
    hadoop fs -ls /user/orco
    
    hadoop fs -ls /user/stocks

    七、列式存储

    在Create/Alter表的时候,可以为表以及分区的文件指定不同的格式
    • Storage Formats
    • Row Formats
    • SerDe

    STORED AS file_format
    – STORED AS PARQUET
    – STORED AS ORC
    – STORED AS SEQUENCEFILE
    – STORED AS AVRO
    – STORED AS TEXTFILE

    列式存储格式ORC与Parquet:存储空间

    列式存储格式ORC与Parquet:性能

    如何创建ORC表

    create table if not exists record_orc (
      rid STRING,
      uid STRING,
      bid STRING,
      price INT,
      source_province STRING,
      target_province STRING,
      site STRING,
      express_number STRING,
      express_company STRING,
      trancation_date DATE
     )
     stored as orc;
    
    show create table record_orc;
    
    载入数据
    select * from record_orc limit 10;
    
    insert into table record_orc select * from record;
    
    select * from record_orc limit 10;

    八、Lateral View,行转多列

    CREATE TABLE IF NOT EXISTS employees (
       name         STRING,
       salary       FLOAT,
       subordinates ARRAY<STRING>,
       deductions   MAP<STRING, FLOAT>,
       address      STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
     )
     ROW FORMAT DELIMITED
     FIELDS TERMINATED BY '01' 
     COLLECTION ITEMS TERMINATED BY '02' 
     MAP KEYS TERMINATED BY '03'
     LINES TERMINATED BY '
    '
     STORED AS TEXTFILE;
    
    查询
    select name,subordinate from employees LATERAL VIEW explode(subordinates) subordinates_table AS subordinate;

    九、explain

  • 相关阅读:
    php pdf添加水印(中文水印,图片水印)
    论文阅读---Reducing the Dimensionality of Data with Neural Networks
    Deep Learning综述[下]
    Install-Package:QRCoder已拥有为System.Drawing.Common定义的依赖项
    linux和windows之间传递文件
    IPV6修复工具
    Deep Learning综述[上]
    novaclient开发中遇到的问题小结
    easybcd删除win10启动项如何恢复?
    uefi+gpt安装双系统
  • 原文地址:https://www.cnblogs.com/sorco/p/7017888.html
Copyright © 2020-2023  润新知