• Spark SQL实战


    一、程序

     1 package sparklearning
     2 
     3 import org.apache.log4j.Logger
     4 import org.apache.spark.SparkConf
     5 import org.apache.spark.SparkContext
     6 import org.apache.spark.sql.SQLContext
     7 import org.apache.spark.storage.StorageLevel
     8 import org.apache.log4j.Level
     9 
    10 object OnLineTradeStatistics {
    11 
    12   case class User(userID:String,gender:String,age:Int,registerDate:String,provice:String,career:String)
    13   case class TradeDetail(tradeID:String, tradeDate:String,productID:Int,amount:Int,userID:String)
    14   def main(args: Array[String]){
    15       
    16     //关闭不必要的日志显示
    17       Logger.getLogger("org.apache.hadoop").setLevel(Level.ERROR)
    18       Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
    19       Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
    20       
    21       //设置应用程序
    22       val conf=new SparkConf().setAppName("On Line Trade Data").setMaster("local")
    23       val ctx=new SparkContext(conf)
    24       val sqlCtx=new SQLContext(ctx)
    25       import sqlCtx.implicits._
    26       
    27       //读文件  RDD-->DataFrame  
    28       val userDF= ctx.textFile("/home/hadoop/data/on_line_trade_user.txt").map(_.split(" ")).map(u=>User(u(0),u(1),u(2).toInt,u(3),u(4),u(5))).toDF()
    29       userDF.registerTempTable("user")
    30       userDF.persist(StorageLevel.MEMORY_ONLY_SER)
    31       
    32       val tradeDF= ctx.textFile("/home/hadoop/data/on_line_trade_detail.txt").map(_.split(" ")).map(u=>TradeDetail(u(0),u(1),u(2).toInt,u(3).toInt,u(4))).toDF()
    33       tradeDF.registerTempTable("trade")//生成临时表
    34       tradeDF.persist(StorageLevel.MEMORY_ONLY_SER)
    35       
    36       val countOfTrade2016 = sqlCtx.sql("SELECT * FROM trade where tradeDate like '2016%'").count()
    37       println("2016 total money: "+countOfTrade2016)
    38   }
    39 }

    二、结果

  • 相关阅读:
    C语言01
    C++面试总结更新
    Python网络爬虫与信息提取02
    Self-Driving Car 01
    Python网络爬虫与信息提取01
    Python-03
    Shell
    Python-05
    Python-04
    Python-02
  • 原文地址:https://www.cnblogs.com/liuzhongfeng/p/7017545.html
Copyright © 2020-2023  润新知