• pig和mysql脚本对比


    测试数据位于:/home/hadoop/luogankun/workspace/sync_data/pig

    dept和emp表来源自oracle数据库自带的表

    dept.txt

    10      ACCOUNTING      NEW YORK
    20      RESEARCH        DALLAS
    30      SALES   CHICAGO
    40      OPERATIONS      BOSTON

    emp.txt

    7369    SMITH   CLERK   7902    1980-12-17      800.00          20
    7499    ALLEN   SALESMAN        7698    1981-2-20       1600.00 300.00  30
    7521    WARD    SALESMAN        7698    1981-2-22       1250.00 500.00  30
    7566    JONES   MANAGER 7839    1981-4-2        2975.00         20
    7654    MARTIN  SALESMAN        7698    1981-9-28       1250.00 1400.00 30
    7698    BLAKE   MANAGER 7839    1981-5-1        2850.00         30
    7782    CLARK   MANAGER 7839    1981-6-9        2450.00         10
    7788    SCOTT   ANALYST 7566    1987-4-19       3000.00         20
    7839    KING    PRESIDENT               1981-11-17      5000.00         10
    7844    TURNER  SALESMAN        7698    1981-9-8        1500.00 0.00    30
    7876    ADAMS   CLERK   7788    1987-5-23       1100.00         20
    7900    JAMES   CLERK   7698    1981-12-3       950.00          30
    7902    FORD    ANALYST 7566    1981-12-3       3000.00         20
    7934    MILLER  CLERK   7782    1982-1-23       1300.00         10

    上传数据到HDFS系统中

    cd /home/hadoop/luogankun/workspace/sync_data/pig
    hadoop fs -put dept.txt input/pig/dept.txt
    hadoop fs -put emp.txt input/pig/emp.txt

    导入

    CREATE TABLE TMP_TABLE(USER VARCHAR(32),AGE INT,IS_MALE BOOLEAN);
    
    dept= LOAD 'input/pig/dept.txt' USING PigStorage('	') AS (deptno:int,dname:chararray,loc:chararray);
    emp = LOAD 'input/pig/emp.txt' USING PigStorage('	') AS (empno:int,ename:chararray, job:chararray, mgr:int, hiredate:chararray,sal:double,comm:double,dept:int);

    查看表结构

    desc TMP_TABLE;
    describe dept dept: {deptno:
    int,dname: chararray,loc: chararray} describe emp emp: {empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int}

    查询整张表

    SELECT * FROM TMP_TABLE;
    DUMP dept (10,ACCOUNTING,NEW YORK) (20,RESEARCH,DALLAS) (30,SALES,CHICAGO) (40,OPERATIONS,BOSTON) DUMP emp (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)

    查询前N条

    SELECT * FROM TMP_TABLE LIMIT 10;
    emp_table_limit
    = LIMIT emp 10; DUMP emp_table_limit; (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)

    查询表中的某些列

    SELECT USER FROM TMP_TABLE;
    emp_table_empno_ename
    = FOREACH emp GENERATE empno, ename; describe emp_table_empno_ename emp_table_empno_ename: {empno: int,ename: chararray} DUMP emp_table_empno_ename; (7369,SMITH) (7499,ALLEN) (7521,WARD) (7566,JONES) (7654,MARTIN) (7698,BLAKE) (7782,CLARK) (7788,SCOTT) (7839,KING) (7844,TURNER) (7876,ADAMS) (7900,JAMES) (7902,FORD) (7934,MILLER)

    给列取别名

    SELECT USER AS USER_NAME,AGE AS USER_AGE FROM TMP_TABLE;
    emp_table_column_alias
    = FOREACH emp GENERATE empno AS id,ename AS name; describe emp_table_column_alias emp_table_column_alias: {id: int,name: chararray} DUMP emp_table_column_alias (7369,SMITH) (7499,ALLEN) (7521,WARD) (7566,JONES) (7654,MARTIN) (7698,BLAKE) (7782,CLARK) (7788,SCOTT) (7839,KING) (7844,TURNER) (7876,ADAMS) (7900,JAMES) (7902,FORD) (7934,MILLER)

    排序

    SELECT * FROM TMP_TABLE ORDER BY AGE;
    emp_table_order
    = ORDER emp BY empno ASC; DUMP emp_table_order; (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)

    条件查询

    SELECT * FROM TMP_TABLE WHERE AGE>20;
    emp_table_where
    = FILTER emp by sal > 1500; DUMP emp_table_where; (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)

    内连接Inner Join

    SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
    emp_table_inner_join
    = JOIN emp BY dept,dept BY deptno; describe emp_table_inner_join emp_table_inner_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int, emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int, dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_inner_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)

    左连接Left Join

    SELECT * FROM TMP_TABLE A LEFT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
    emp_table_left_join
    = JOIN emp BY dept LEFT OUTER,dept BY deptno; describe emp_table_left_join emp_table_left_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int, emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int, dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_left_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)

    右连接Right Join

    SELECT * FROM TMP_TABLE A RIGHT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
    emp_table_right_join
    = JOIN emp BY dept RIGHT OUTER,dept BY deptno; describe emp_table_right_join emp_table_right_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int, emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int, dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_right_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO) (,,,,,,,,40,OPERATIONS,BOSTON)

    全连接Full Join

    SELECT * FROM TMP_TABLE A  JOIN TMP_TABLE_2 B ON A.AGE=B.AGE
    emp_table_full_join
    = JOIN emp BY dept FULL OUTER,dept BY deptno; describe emp_table_full_join emp_table_full_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int, emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int, dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_full_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO) (,,,,,,,,40,OPERATIONS,BOSTON)

    同时对多张表交叉查询

    SELECT * FROM TMP_TABLE,TMP_TABLE_2;
    emp_table_cross
    = CROSS emp,dept; describe emp_table_cross emp_table_cross: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int, emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int, dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_cross; (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,10,ACCOUNTING,NEW YORK) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,30,SALES,CHICAGO) (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,40,OPERATIONS,BOSTON) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,10,ACCOUNTING,NEW YORK) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,20,RESEARCH,DALLAS) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO) (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,40,OPERATIONS,BOSTON) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,10,ACCOUNTING,NEW YORK) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,20,RESEARCH,DALLAS) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,40,OPERATIONS,BOSTON) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,10,ACCOUNTING,NEW YORK) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,30,SALES,CHICAGO) (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,40,OPERATIONS,BOSTON) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,10,ACCOUNTING,NEW YORK) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,20,RESEARCH,DALLAS) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,40,OPERATIONS,BOSTON) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,10,ACCOUNTING,NEW YORK) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,20,RESEARCH,DALLAS) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO) (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,40,OPERATIONS,BOSTON) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,20,RESEARCH,DALLAS) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,30,SALES,CHICAGO) (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,40,OPERATIONS,BOSTON) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,10,ACCOUNTING,NEW YORK) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,30,SALES,CHICAGO) (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,40,OPERATIONS,BOSTON) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,20,RESEARCH,DALLAS) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,30,SALES,CHICAGO) (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,40,OPERATIONS,BOSTON) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,10,ACCOUNTING,NEW YORK) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,20,RESEARCH,DALLAS) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,40,OPERATIONS,BOSTON) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,10,ACCOUNTING,NEW YORK) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,30,SALES,CHICAGO) (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,40,OPERATIONS,BOSTON) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,10,ACCOUNTING,NEW YORK) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,20,RESEARCH,DALLAS) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO) (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,40,OPERATIONS,BOSTON) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,10,ACCOUNTING,NEW YORK) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,30,SALES,CHICAGO) (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,40,OPERATIONS,BOSTON) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,20,RESEARCH,DALLAS) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,30,SALES,CHICAGO) (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,40,OPERATIONS,BOSTON)

    分组GROUP BY

    SELECT * FROM TMP_TABLE GROUP BY IS_MALE;
    emp_table_group
    = GROUP emp BY dept; describe emp_table_group emp_table_group: { group: int, emp: { (empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double, comm: double,dept: int) } } DUMP emp_table_group; (10,{ (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10), (7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10), (7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)}) (20,{ (7369,SMITH,CLERK,7902,1980-12-17,800.0,,20), (7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20), (7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20), (7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20), (7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)}) (30,{ (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30), (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30), (7900,JAMES,CLERK,7698,1981-12-3,950.0,,30), (7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30), (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30), (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)})

    分组并统计

    SELECT IS_MALE,COUNT(*) FROM TMP_TABLE GROUP BY IS_MALE;
    emp_table_group_count
    = GROUP emp BY dept; describe emp_table_group_count emp_table_group_count: { group: int, emp: { (empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int ) } } emp_table_group_count = FOREACH emp_table_group_count GENERATE group,COUNT($1); describe emp_table_group_count DUMP emp_table_group_count; (10,3) (20,5) (30,6) emp_table_group_count = FOREACH emp_table_group_count GENERATE emp.dept,COUNT($1); describe emp_table_group_count emp_table_group_count: {{(dept: int)},long} DUMP emp_table_group_count; ({(10),(10),(10)},3) ({(20),(20),(20),(20),(20)},5) ({(30),(30),(30),(30),(30),(30)},6)

    去重DISTINCT

    SELECT DISTINCT IS_MALE FROM TMP_TABLE;
    emp_table_distinct
    = FOREACH emp GENERATE dept; describe emp_table_distinct emp_table_distinct: {dept: int} emp_table_distinct = DISTINCT emp_table_distinct; describe emp_table_distinct emp_table_distinct: {dept: int} DUMP emp_table_distinct; (10) (20) (30)

    pig判空

    select * from emp where comm is not null;
    emp_table_where_null
    = FILTER emp by comm is not null; DUMP emp_table_where_null; (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30) (7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30) (7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30) (7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
  • 相关阅读:
    牛客小白月赛21
    牛客小白月赛21
    CodeForces 1333-C Eugene and an array(子区间和为0、前缀和)
    页面大小、页表项、虚拟地址和物理地址之间的关系(转)
    001-Paint_FreePythonGames项目代码详解(每行都有注释!!!)
    第17讲~第19讲:函数:python的乐高积木
    第16讲:序列!序列!
    第15讲:字符串格式化
    练习23--字符串、字节和编码
    第14讲:字符串--各种奇葩内置方法
  • 原文地址:https://www.cnblogs.com/luogankun/p/3897179.html
Copyright © 2020-2023  润新知