/** * Created by root on 9/7/15. */ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.SQLContext object SparkSQLTest { def main(args: Array[String]) { val conf = new SparkConf().setAppName("Spark SQL Test").setMaster("local") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) //create DataFrame based on the content of a JSON file val df = sqlContext.read.json("/home/slh/data/sqltest.json") df.show() //print the schema in a tree format df.printSchema() //select only the "name" column df.select("name").show() //select everybody, but increment the age by 1 df.select(df("name"), df("age") + 1).show() //select people older than 13 df.filter(df("age") > 13).show() //count people by age df.groupBy("age").count().show() //data from json val anotherPeopleRDD = sc.parallelize( """{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}""" :: Nil) val anotherPeople = sqlContext.read.json(anotherPeopleRDD) anotherPeople.printSchema() } }