• spark APP升级


    2017-08-14,涛哥,共享女友,360应用,v1.0
    2017-08-14,涛哥,共享女友,360应用,v1.2
    2017-08-14,涛哥,共享女友,360应用,v1.3
    2017-09-14,涛哥,同城交友,360应用,v1.0
    2017-09-14,涛哥,同城交友,360应用,v1.0
    2017-09-14,涛哥,同城交友,360应用,v1.3
    2017-09-14,涛哥,同城交友,360应用,v1.5
    2017-08-15,涛哥,约吗,360应用,v1.3


    1条结果数据: 2017-08-14,涛哥,共享女友,360应用,v1.0,v1.3
    求出最大升级情况
    package com.bw.homework

    import scala.io.Source

    object TaoGe {
    def main(args: Array[String]): Unit = {
    val res1 :Iterator[String]=Source.fromFile("tao.txt").getLines()
    val res3: List[Array[String]] = res1.map(t => {
    val res2: Array[String] = t.split(",")
    res2
    }).toList
    val res4: Map[(String, String,String,String), List[Array[String]]] = res3.groupBy(t => {
    (t(0), t(1),t(2),t(3))
    })
    val res5: Map[(String, String,String,String), List[String]] = res4.mapValues(t => t.map(e => {
    e(4)
    }))
    val res7: List[((String, String,String,String), (String, String))] = res5.toList.flatMap(t => {
    val name: (String, String,String,String) = t._1
    val app: List[(String, String)] = t._2 zip t._2.tail
    val res6: List[((String, String,String,String), (String, String))] = app.map(e => {
    (name, e)
    })
    res6
    })
    // res7.foreach(println)
    println("-----------*****************-------------")
    val res8: Map[(String, String,String,String), (String, String)] = res5.mapValues(t => {
    (t.min, t.max)
    })
    res8.foreach(println)

    }

    }
    -----------*****************-------------
    ((2017-08-14,涛哥,共享女友,360应用),(v1.0,v1.3))
    ((2017-09-14,涛哥,同城交友,360应用),(v1.0,v1.5))
    ((2017-08-15,涛哥,约吗,360应用),(v1.3,v1.3))




    需求2:
    2017-08-14,涛哥,共享女友,360应用,v1.0,v1.2
    2017-08-14,涛哥,共享女友,360应用,v1.2,v1.3
    求出每次升级情况
    res7
    ((2017-08-14,涛哥,共享女友,360应用),(v1.0,v1.2))
    ((2017-08-14,涛哥,共享女友,360应用),(v1.2,v1.3))
    ((2017-09-14,涛哥,同城交友,360应用),(v1.0,v1.0))
    ((2017-09-14,涛哥,同城交友,360应用),(v1.0,v1.3))
    ((2017-09-14,涛哥,同城交友,360应用),(v1.3,v1.5))



    (二)另一个案例
    package com.bw.spark

    import scala.io.{BufferedSource, Source}


    object jishi {
      def main(args: Array[String]): Unit = {
        /**
          * 1.得出每个用户每个app每次升级结果
          * 张三 腾讯视频 v1.3 v1.4
          * 张三 腾讯视频 v1.4 v1.6
          * 张三 腾讯视频 v1.6 v1.9
          */
        val res1: Iterator[String] = Source.fromFile("D:\ideaworkspace\spark01\jishi").getLines()
        val res3: List[Array[String]] = res1.map(t => {
          val res2: Array[String] = t.split("\t")
          res2
        }).toList
        val res4: Map[(String, String), List[Array[String]]] = res3.groupBy(t => {
          (t(0), t(1))
        })
        val res5: Map[(String, String), List[String]] = res4.mapValues(t => t.map(e => {
          e(3)
        }))
        val res7: List[((String, String), (String, String))] = res5.toList.flatMap(t => {
          val name: (String, String) = t._1
          val app: List[(String, String)] = t._2 zip t._2.tail
          val res6: List[((String, String), (String, String))] = app.map(e => {
            (name, e)
          })
         res6

        })
        res7.foreach(println)
        println("-----------*****************-------------")
        val res8: Map[(String, String), (String, String)] = res5.mapValues(t => {
          (t.min, t.max)
        })
        res8.foreach(println)

      }
    }


    (三)package com.bw.homework

    import scala.io.Source
    //2017-09-14,涛哥,同城交友,360应用,v1.0
    object TaoGe2 {
    def main(args: Array[String]): Unit = {
    val data :Iterator[String]=Source.fromFile("tao.txt").getLines()
    val data1:Iterator[((String,String,String,String),String)]=data.map(t=>{
    val strs=t.split(",")//逗号切分
    ((strs(0),strs(1),strs(2),strs(3)),strs(4))
    })
    val groupData:Map[(String,String,String,String),List[((String,String,String,String),String)]]=data1.toList.groupBy(_._1)
    val maxAndMin:Map[(String,String,String,String),(String,String)]=groupData.mapValues(t=>{
    val versions:List[String]=t.map(_._2)
    (versions.min,versions.max)

    })
    maxAndMin.foreach(println)


    println("==================================")
    val allVersion:Map[(String,String,String,String),List[(String,String)]]=
    groupData.mapValues(t=>{
    val versions:List[String]=t.map(_._2).distinct
    if(versions.length>1){
    val sortVersion=versions.sorted
    //v1.0 v1.1 v1.2 v1.4 v1.9
    val tailVersions=sortVersion.tail
    // v1.1 v1.2 v1.4 v1.9
    val resVersion:List[(String,String)]=sortVersion zip tailVersions
    resVersion
    }else{
    List((versions(0),versions(0)))
    }

    })
    val result:List[((String,String,String,String),(String,String))]=
    allVersion.toList.flatMap(t=>{
    var info =t._1
    var versions=t._2
    val res:List[((String,String,String,String),(String,String))]=versions.map((info,_))
    res
    })
    result.foreach(println)
    }
    }
  • 相关阅读:
    Java 方法重载 (Overload)
    Java 向数组中添加一个元素
    Java 三目运算符
    代理池的维护
    代理设置
    验证码识别
    使用Selenium爬取淘宝商品
    Splash API 调用
    Android ListView中Item点击事件失效解决方案
    mapreduce框架详解
  • 原文地址:https://www.cnblogs.com/wxk161640207382/p/11189204.html
Copyright © 2020-2023  润新知