package sql; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.SQLContext; /** * */ public class DataFrameReadJsonOps2 { /** * @param args */ public static void main(String[] args) { //创建SparkConf用于读取系统配置信息并设置当前应用程序的名字 SparkConf conf = new SparkConf().setAppName("DataFrameOps").setMaster("local"); //创建JavaSparkContext对象实例作为整个Driver的核心基石 JavaSparkContext sc = new JavaSparkContext(conf); //设置日志级别为WARN sc.setLogLevel("WARN"); //创建SQLContext上下文对象用于SQL的分析 SQLContext sqlContext = new SQLContext(sc); //创建Data Frame,可以简单的认为DataFrame是一张表 DataFrame df = sqlContext.read().json("c:/resources/people.json"); //select * from table df.show(); //desc table df.printSchema(); //select name from table df.select(df.col("name")).show(); //select name, age+10 from table df.select(df.col("name"), df.col("age").plus(10)).show(); //select * from table where age > 21 df.filter(df.col("age").gt(21)).show(); //select age, count(1) from table group by age df.groupBy("age").count().show(); //df.groupBy(df.col("age")).count().show(); } } // //SLF4J: Class path contains multiple SLF4J bindings. //SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] //SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-examples-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] //SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. //SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] //Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties //17/12/29 14:15:10 INFO SparkContext: Running Spark version 1.4.0 //17/12/29 14:15:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable //17/12/29 14:15:28 INFO SecurityManager: Changing view acls to: alamps //17/12/29 14:15:28 INFO SecurityManager: Changing modify acls to: alamps //17/12/29 14:15:28 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(alamps); users with modify permissions: Set(alamps) //17/12/29 14:15:37 INFO Slf4jLogger: Slf4jLogger started //17/12/29 14:15:39 INFO Remoting: Starting remoting //17/12/29 14:15:44 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@172.18.3.7:55458] //17/12/29 14:15:44 INFO Utils: Successfully started service 'sparkDriver' on port 55458. //17/12/29 14:15:45 INFO SparkEnv: Registering MapOutputTracker //17/12/29 14:15:46 INFO SparkEnv: Registering BlockManagerMaster //17/12/29 14:15:46 INFO DiskBlockManager: Created local directory at C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80blockmgr-660894dd-39d3-4c8a-bf25-ae1d3850953d //17/12/29 14:15:46 INFO MemoryStore: MemoryStore started with capacity 467.6 MB //17/12/29 14:15:47 INFO HttpFileServer: HTTP File server directory is C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80bhttpd-106ce90e-d496-4e96-a383-b471aeb5a224 //17/12/29 14:15:47 INFO HttpServer: Starting HTTP Server //17/12/29 14:15:48 INFO Utils: Successfully started service 'HTTP file server' on port 55464. //17/12/29 14:15:48 INFO SparkEnv: Registering OutputCommitCoordinator //17/12/29 14:15:49 INFO Utils: Successfully started service 'SparkUI' on port 4040. //17/12/29 14:15:49 INFO SparkUI: Started SparkUI at http://172.18.3.7:4040 //17/12/29 14:15:49 INFO Executor: Starting executor ID driver on host localhost //17/12/29 14:15:50 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 55483. //17/12/29 14:15:50 INFO NettyBlockTransferService: Server created on 55483 //17/12/29 14:15:50 INFO BlockManagerMaster: Trying to register BlockManager //17/12/29 14:15:50 INFO BlockManagerMasterEndpoint: Registering block manager localhost:55483 with 467.6 MB RAM, BlockManagerId(driver, localhost, 55483) //17/12/29 14:15:50 INFO BlockManagerMaster: Registered BlockManager //+----+-------+ //| age| name| //+----+-------+ //|null|Michael| //| 30| Andy| //| 19| Justin| //+----+-------+ // //root // |-- age: long (nullable = true) // |-- name: string (nullable = true) // //+-------+ //| name| //+-------+ //|Michael| //| Andy| //| Justin| //+-------+ // //+-------+----------+ //| name|(age + 10)| //+-------+----------+ //|Michael| null| //| Andy| 40| //| Justin| 29| //+-------+----------+ // //+---+----+ //|age|name| //+---+----+ //| 30|Andy| //+---+----+ // //+----+-----+ //| age|count| //+----+-----+ //|null| 1| //| 19| 1| //| 30| 1| //+----+-----+