• sparkSQL实际应用


     提交代码包

    /usr/local/spark/bin$ spark-submit --class "getkv" /data/chun/sparktes.jar

    1、查询KV

    import org.apache.spark.SparkContext
    import org.apache.spark.SparkContext._
    import org.apache.spark.SparkConf
    object kv{
      def main(args: Array[String]) {
    
        val conf = new SparkConf()
        val sc = new SparkContext(conf)
        val log=sc.textFile("hdfs://10.0.58.21:9000/falcon/2016/*/*/*.log")
        val rowRDD=log.map(line=>(line.split(""message":"").last.split(" ").head.trim(),line.split("account: ").last.split(", args:").head))
        val k=rowRDD.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==8})
        k.repartition(1).saveAsTextFile("file:////data/kv")
      }
    }

    2、关联MySQL

     #  spark-shell --driver-class-path /usr/local/spark/mysql/mysql.jar
    val log=sc.textFile("hdfs://10.0.58.21:9000/falcon/2016/*/*/*.log") val rowRDD=log.map(line=>(line.split(""message":"").last.split(" ").head.trim(),line.split("account: ").last.split(", args:").head)) val k=rowRDD.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==7}) val s=k.toDF("date","No").registerTempTable("kv") val role=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","role") val job=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","job") val staff_and_job=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","staff_and_job") val project=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","project") val ro=role.toDF().registerTempTable("role") val jo=job.toDF().registerTempTable("job") val s=staff_and_job.toDF().registerTempTable("staff_and_job") val p=project.toDF().registerTempTable("project") val q=sqlContext.sql("SELECT project.`name`,project.`code`,staff_and_job .`staff_id` FROM project LEFT JOIN job ON project.`code`=job.`project_code` LEFT JOIN role ON job.`role_code`=role.`code` LEFT JOIN staff_and_job ON job.`id`=staff_and_job .`job_id` WHERE project.`is_spread`='1' AND role.`name`='人事专员' AND staff_and_job .`staff_id` IS NOT NULL") val q1=q.toDF("name","code","No").registerTempTable("p") val ed=sqlContext.sql("select p.name,count(distinct kv.No) from p join kv on p.No=kv.No group By p.name")

     3 、项目关联活跃用户数

    val log=sc.textFile("hdfs://10.0.58.21:9000/falcon/2016/*/*/*.log")
    val rowRDD=log.map(line=>(line.split(""message":"").last.split(" ").head.trim(),line.split("account: ").last.split(", args:").head))
    
    val k=rowRDD.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==8})
    
    val s=k.toDF("date","No").registerTempTable("kv")

    
    
    val user=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","user")  
    val house=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","house")
    val project=sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","project")
    val us=user.toDF().registerTempTable("user")

      val h=house.toDF().registerTempTable("house")

      val pr=project.toDF().registerTempTable("project")


    val q1
    =sqlContext.sql("select project.`name`,kv.date,count(distinct kv.No) from user join kv on kv.No=user.id left join house on user.`main_house_code`=house.`code` left join project on house.`project_code`=project.`code` where kv.date>='2016-05-01' and kv.date<='2016-05-16' and project.name='苏州金色里程' group by project.name,kv.date")
    val q1=sqlContext.sql("select project.`name`,count(distinct kv.No) from user  join kv on kv.No=user.id left join house on user.`main_house_code`=house.`code` left join project on house.`project_code`=project.`code` where kv.date>='2016-05-01'  and kv.date<='2016-05-16'  and project.name='苏州金色里程' group by project.name,kv.date")

    q1.repartition(1).rdd.saveAsTextFile("file:////data/kvv")
     

    4、活动页面浏览


    spark-shell --driver-class-path /usr/local/spark/mysql/mysql.jar
    val log=sc.textFile("hdfs://10.0.58.21:9000/falcon/2016/05/16/*.log")

    val log=sc.textFile("hdfs://10.0.58.21:9000/user/yejin/*.log")

    val k=f.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==8})
    
    val s=k.toDF("date","No").registerTempTable("kv")

    5、员工关联岗位

    val log=sc.textFile("hdfs://10.0.58.21:9000/user/yejin/*.log")
    
    val rowRDD=log.map(line=>(line.split(""message":"").last.split(",").head.trim(),line.split("account: ").last.split(", args:").head))
    
    val k=rowRDD.filter({case(k,v) =>k.length==10 && !k.contains("TypeError:")}).filter({case(k,v)=>v.length==7})
    val l=k.toDF("date","No").registerTempTable("kv")

    val staff_and_job =sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","staff_and_job")
    val job =sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","job")
    val role =sqlContext.jdbc("jdbc:mysql://rdssw603u1t68figaia7.mysql.rds.aliyuncs.com:3306/falcon?user=wy_app&password=V0tkEIve2","role")
    val sj=staff_and_job.toDF().registerTempTable("staff_and_job")
    val jb=job.toDF().registerTempTable("job")
    val rl=role.toDF().registerTempTable("role")
    val ed=sqlContext.sql("SELECT staff_and_job.`staff_id`,role.`name` FROM staff_and_job LEFT JOIN job ON staff_and_job.`job_id`=job.`id` LEFT JOIN role ON job.`role_code`=role.`code`")
    val xy=ed.toDF("No","Name").registerTempTable("T")
    val r=sqlContext.sql("select T.Name,count(distinct kv.No) from T join kv on kv.No=T.No where kv.date='2016-05-09' group by T.Name")
    r.repartition(1).rdd.saveAsTextFile("")

  • 相关阅读:
    2018 Android面试经历总结(京东、高德、爱奇艺、美团、摩拜等) csdn_aiyang的博客 CSDN博客
    @QQ音乐Android端120万行代码,编译耗时是怎样优化的_chuhe1989的博客CSDN博客
    @Android deeplink和AppLink原理 mingfeng002 博客园
    @Android冷启动优化解析_chuhe1989的博客CSDN博客
    (3条消息) Android 面试必备 http 与 https 协议_gdutxiaoxu的博客(微信公众号 stormjun94)CSDN博客_android 面试http
    @Android面试题(32)android编译过程和安装流程_pgg_cold的博客CSDN博客
    @Android面试题集2019版(包含答案整理)_A富得流油的咸鸭蛋的博客CSDN博客
    一道有意思的面试题目
    22
    @说说卡顿问题 简书
  • 原文地址:https://www.cnblogs.com/canyangfeixue/p/5488260.html
Copyright © 2020-2023  润新知