• spark sql01


    package sql;
    
    
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.sql.DataFrame;
    import org.apache.spark.sql.SQLContext;
    
    /**
     *
     */
    public class DataFrameReadJsonOps2 {
    
        /**
         * @param args
         */
        public static void main(String[] args) {
            //创建SparkConf用于读取系统配置信息并设置当前应用程序的名字
            SparkConf conf = new SparkConf().setAppName("DataFrameOps").setMaster("local");
            //创建JavaSparkContext对象实例作为整个Driver的核心基石
            JavaSparkContext sc = new JavaSparkContext(conf);
            //设置日志级别为WARN
            sc.setLogLevel("WARN");
            //创建SQLContext上下文对象用于SQL的分析
            SQLContext sqlContext = new SQLContext(sc);
            //创建Data Frame,可以简单的认为DataFrame是一张表
            DataFrame df = sqlContext.read().json("c:/resources/people.json");
            //select * from table
            df.show();
            //desc table
            df.printSchema();
            //select name from table
            df.select(df.col("name")).show();
            //select name, age+10 from table
            df.select(df.col("name"), df.col("age").plus(10)).show();
            //select * from table where age > 21
            df.filter(df.col("age").gt(21)).show();
            //select age, count(1) from table group by age
            df.groupBy("age").count().show(); //df.groupBy(df.col("age")).count().show();
        }
    
    }
    //
    //SLF4J: Class path contains multiple SLF4J bindings.
    //SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    //SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-examples-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    //SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
    //SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
    //Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
    //17/12/29 14:15:10 INFO SparkContext: Running Spark version 1.4.0
    //17/12/29 14:15:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    //17/12/29 14:15:28 INFO SecurityManager: Changing view acls to: alamps
    //17/12/29 14:15:28 INFO SecurityManager: Changing modify acls to: alamps
    //17/12/29 14:15:28 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(alamps); users with modify permissions: Set(alamps)
    //17/12/29 14:15:37 INFO Slf4jLogger: Slf4jLogger started
    //17/12/29 14:15:39 INFO Remoting: Starting remoting
    //17/12/29 14:15:44 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@172.18.3.7:55458]
    //17/12/29 14:15:44 INFO Utils: Successfully started service 'sparkDriver' on port 55458.
    //17/12/29 14:15:45 INFO SparkEnv: Registering MapOutputTracker
    //17/12/29 14:15:46 INFO SparkEnv: Registering BlockManagerMaster
    //17/12/29 14:15:46 INFO DiskBlockManager: Created local directory at C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80blockmgr-660894dd-39d3-4c8a-bf25-ae1d3850953d
    //17/12/29 14:15:46 INFO MemoryStore: MemoryStore started with capacity 467.6 MB
    //17/12/29 14:15:47 INFO HttpFileServer: HTTP File server directory is C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80bhttpd-106ce90e-d496-4e96-a383-b471aeb5a224
    //17/12/29 14:15:47 INFO HttpServer: Starting HTTP Server
    //17/12/29 14:15:48 INFO Utils: Successfully started service 'HTTP file server' on port 55464.
    //17/12/29 14:15:48 INFO SparkEnv: Registering OutputCommitCoordinator
    //17/12/29 14:15:49 INFO Utils: Successfully started service 'SparkUI' on port 4040.
    //17/12/29 14:15:49 INFO SparkUI: Started SparkUI at http://172.18.3.7:4040
    //17/12/29 14:15:49 INFO Executor: Starting executor ID driver on host localhost
    //17/12/29 14:15:50 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 55483.
    //17/12/29 14:15:50 INFO NettyBlockTransferService: Server created on 55483
    //17/12/29 14:15:50 INFO BlockManagerMaster: Trying to register BlockManager
    //17/12/29 14:15:50 INFO BlockManagerMasterEndpoint: Registering block manager localhost:55483 with 467.6 MB RAM, BlockManagerId(driver, localhost, 55483)
    //17/12/29 14:15:50 INFO BlockManagerMaster: Registered BlockManager
    //+----+-------+
    //| age|   name|
    //+----+-------+
    //|null|Michael|
    //|  30|   Andy|
    //|  19| Justin|
    //+----+-------+
    //
    //root
    // |-- age: long (nullable = true)
    // |-- name: string (nullable = true)
    //
    //+-------+
    //|   name|
    //+-------+
    //|Michael|
    //|   Andy|
    //| Justin|
    //+-------+
    //
    //+-------+----------+
    //|   name|(age + 10)|
    //+-------+----------+
    //|Michael|      null|
    //|   Andy|        40|
    //| Justin|        29|
    //+-------+----------+
    //
    //+---+----+
    //|age|name|
    //+---+----+
    //| 30|Andy|
    //+---+----+
    //
    //+----+-----+
    //| age|count|
    //+----+-----+
    //|null|    1|
    //|  19|    1|
    //|  30|    1|
    //+----+-----+
  • 相关阅读:
    热安装NGINX并支持多站点SSL
    centos 自启动
    gulp入门
    nodejs的包管理器npm和cnpm
    Chrome扩展及应用开发
    dubbo本地搭建实例
    web项目中获取spring的bean对象
    Windows下zookeeper注册中心的安装和启动
    dubbo-admin管理控制台的安装和使用
    启动zookeeper报错:JAVA_HOME is not set
  • 原文地址:https://www.cnblogs.com/alamps/p/8144298.html
Copyright © 2020-2023  润新知