• RDD实例


    实例一:

    teacher.log

    http://bigdata.baidu.cn/zhangsan
    http://bigdata.baidu.cn/zhangsan
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/wangwu
    http://bigdata.baidu.cn/wangwu
    http://javaee.baidu.cn/xiaoxu
    http://javaee.baidu.cn/xiaoxu
    http://javaee.baidu.cn/laoyang
    http://javaee.baidu.cn/laoyang
    http://javaee.baidu.cn/laoyang
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/wangwu
    http://bigdata.baidu.cn/wangwu
    http://javaee.baidu.cn/xiaoxu
    http://javaee.baidu.cn/xiaoxu
    http://javaee.baidu.cn/laoyang
    http://javaee.baidu.cn/laoyang
    http://javaee.baidu.cn/laoyang
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/lisi
    http://bigdata.baidu.cn/wangwu
    http://bigdata.baidu.cn/wangwu
    http://javaee.baidu.cn/xiaoxu
    http://javaee.baidu.cn/xiaoxu
    http://javaee.baidu.cn/laoyang
    http://javaee.baidu.cn/laoyang
    http://javaee.baidu.cn/laoyang
    http://php.baidu.cn/laoli
    http://php.baidu.cn/laoliu
    http://php.baidu.cn/laoli
    http://php.baidu.cn/laoli

    全局topn  组内topn

    代码:

    package dayo1
    
    import java.net.URL
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    
    object teacher2 {
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf ()
          .setAppName ( this.getClass.getSimpleName )
          .setMaster ( "local[1]" )
    
        val sc = new SparkContext ( conf )
    
        val lines = sc.textFile ( "E:\teacher.log" )
    
        val overAll: RDD[((String, String), Int)] = lines.map ( tp => {
          val teacher: String = tp.split ( "/" ).last
          val host = new URL ( tp ).getHost
          val subject = host.substring ( 0, host.indexOf ( "." ) )
          ((teacher, subject), 1)
        } )
        //所有科目和老师的前三
        val topOverAll = overAll.reduceByKey ( _ + _ ).sortBy ( -_._2 ).take ( 3 ).foreach ( println )
    
        //每个科目前两名的老师
        val topGroup = overAll.reduceByKey ( _ + _ ).groupBy ( _._1._2 ).mapValues ( _.toList.sortBy ( -_._2 ).take ( 2 ) ).foreach ( println )
        sc.stop ()
    
    
      }
    }

    实例二:

    去重

    file1:
    2012-3-1 a
    2012-3-2 b
    2012-3-3 c
    2012-3-4 d
    2012-3-5 a
    2012-3-6 b
    2012-3-7 c
    2012-3-3 c
    
    
    file2:
    2012-3-1 b
    2012-3-2 a
    2012-3-3 b
    2012-3-4 d
    2012-3-5 a
    2012-3-6 c
    2012-3-7 d
    2012-3-3 c

    代码:

    package dayo1
    
    import org.apache.spark.{SparkConf, SparkContext}
    
    object distinct {
      def main(args: Array[String]): Unit = {
        val cof = new SparkConf ()
          .setAppName ( this.getClass.getSimpleName )
          .setMaster ( "local[1]" )
    
        val sc = new SparkContext ( cof )
    
        val file1 = sc.textFile ( "E:\file1.txt" )
        val file2 = sc.textFile ( "E:\file2.txt" )
        val list = file1.union ( file2 ).distinct ().sortBy ( tp => tp )
        list.foreach ( println )
        sc.stop ()
      }
    }

     实例三:

    temperature.txt

    0067011990999991950051507004888888889999999N9+00001+9999999999999999999999
    0067011990999991950051512004888888889999999N9+00221+9999999999999999999999
    0067011990999991950051518004888888889999999N9-00111+9999999999999999999999
    0067011990999991949032412004888888889999999N9+01111+9999999999999999999999
    0067011990999991950032418004888888880500001N9+00001+9999999999999999999999
    0067011990999991950051507004888888880500001N9+00781+9999999999999999999999

    需求:分析每年的最高温度

    代码:

    package dayo1
    
    import org.apache.spark.{SparkConf, SparkContext}
    
    /**
      * 0067011990999991950051507004888888889999999N9+00001+9999999999999999999999
      * 0067011990999991950051512004888888889999999N9+00221+9999999999999999999999
      * 0067011990999991950051518004888888889999999N9-00111+9999999999999999999999
      * 0067011990999991949032412004888888889999999N9+01111+9999999999999999999999
      * 0067011990999991950032418004888888880500001N9+00001+9999999999999999999999
      * 0067011990999991950051507004888888880500001N9+00781+9999999999999999999999
      *
      * 12345678911234567892123456789312345678941234567895123456789612345678971234
      * 需求:分析每年的最高温度
      * 数据说明:
      *
      *
      * 第15-19个字符是year 6-9
      *
      * 第45-50位是温度表示,+表示零上 -表示零下,且温度的值不能是9999,9999表示异常数据
      *
      * 第50位值只能是0、1、4、5、9几个数字
      */
    object temperature {
      def main(args: Array[String]): Unit = {
        val cof = new SparkConf ()
          .setAppName ( this.getClass.getSimpleName )
          .setMaster ( "local[*]" )
        val sc = new SparkContext ( cof )
    
        val lines = sc.textFile ( "E:\temperature.txt" )
    
        val yearAndTemp = lines.filter ( tp => {
          var temp = 0
          val query = tp.charAt ( 50 ).toString //val query=tp.subString(50,51)
          if (tp.charAt ( 45 ).equals ( "+" )) {
            temp = tp.substring ( 45, 50 ).toInt
          } else {
            temp = tp.substring ( 45, 50 ).toInt
          }
          temp != 9999 && query.matches ( "[01459]" )
    
        } ).map ( tp => {
    
          val year = tp.substring ( 15, 19 )
          var temp = 0
          if (tp.charAt ( 45 ).equals ( "+" )) {
            temp = tp.substring ( 45, 50 ).toInt
          } else {
            temp = tp.substring ( 45, 50 ).toInt
          }
    
          (year, temp)
        } )
    
    
        val res = yearAndTemp.reduceByKey ( (x, y) => if (x > y) x else y )
    
        res.foreach ( tp => println ( "year:" + tp._1 + "  temp:" + tp._2 ) )
        sc.stop ()
      }
    }
  • 相关阅读:
    实现微信朋友圈点击评论按钮时cell上移
    UITableView的横向使用
    用Xcode6的Leaks检测内存泄漏
    IOS8设备连接XCODE6真机调试报错"Could not inspect the application package"
    Implicit declaration of function 'ether_ntoa' is invalid in C99
    .xcodeprok cannot be opened because the project file cannot be parsed
    根据图片的链接获取图片的宽高
    关于UIWebView设置高度自适应的问题
    reason: 'data parameter is nil'
    CSS图标文字对齐和表单输入框文字对齐兼容
  • 原文地址:https://www.cnblogs.com/wangshuang123/p/11058723.html
Copyright © 2020-2023  润新知