• scala06


    scala06

    option的匹配

    val map = Map(("zhangsan",2000),("lisi",2500),("wangwu",3000))
    val option:Any = map.get("zhangsan")
    option match {
      case Some(v) =>println(v)
      case None=>println("nothing")
      case _=>println()
    }

    作业题:

    object Test2 {
      def main(args: Array[String]): Unit = {
        //mr  mapper  lineRecordReader  iterator  list
         val lines:Iterator[String] = Source.fromFile("log.txt").getLines()
    //     println("site3,,".split(",",-1).length)
        val filterData:Iterator[String] = lines.filter(_.split(",").length==3)
        //1 site  2 user  3 time
        //day uv pv
        val data1:Iterator[(String,String,String)] = filterData.map(t=>{
          val strs:Array[String] = t.split(",")
          (strs(0),strs(1),strs(2).split(" ")(0))
          //site user day
        })
        val data2:Map[(String,String),List[(String,String,String)]] = data1.toList.groupBy(t=>(t._1,t._3))

       val dayPV =  data2.mapValues(_.length)

        dayPV.foreach(println)

        val dayUV = data2.mapValues(t=>t.map(_._2).distinct.length)

        dayUV.foreach(println)
      }
    }
    //site4,user5,
    //site3,,
    //site1,user1,2018-03-02 02:25:25
    object Test3 {
      def main(args: Array[String]): Unit = {
        //mr  mapper  lineRecordReader  iterator  list
        val lines:Iterator[String] = Source.fromFile("log.txt").getLines()
        //     println("site3,,".split(",",-1).length)
        val filterData:Iterator[String] = lines.filter(_.split(",").length==3)
        //1 site  2 user  3 time
        //day uv pv
        val data1:Iterator[(String,String,String,String)] = filterData.map(t=>{
          val strs:Array[String] = t.split(",")
          val times = strs(2).split(" ")
          val hour = times(1).split(":")(0)
          (strs(0),strs(1),times(0),hour)
          //site user day
        })
        val data2:Map[(String,String,String),List[(String,String,String,String)]] = data1.toList.groupBy(t=>(t._1,t._3,t._4))

        val hourPV =  data2.mapValues(_.length)

        hourPV.foreach(println)

        println("**********************")

        val hourUV = data2.mapValues(t=>t.map(_._2).distinct.length)

        hourUV.foreach(println)
      }
    }

    object MovieTest {
      def main(args: Array[String]): Unit = {
        val ratingData = Source.fromFile("ratings.txt").getLines()
        val movieData = Source.fromFile("movies.txt").getLines()
        val ratingData1:Iterator[(String,String,String)] = ratingData.map(t=>{
          val strs = t.split(",")
          (strs(0),strs(1),strs(2))
          //uid  mid
        })
        val movieData1:Iterator[(String,String)] = movieData.map(t=>{
          val strs = t.split(",")
          (strs(0),strs(strs.length-1))
          //mid types
        })
        // mid  type
          val mAndTypes:Map[String,String] = movieData1.toMap

       val umScore:Iterator[(String,String,String)] = ratingData1.flatMap(t=>{
          //t uid  mid  score
         val types:String =  mAndTypes(t._2) // 动作|惊悚|犯罪
    //      (t._1,t._3,types)
          //uid score types
          val typess = types.split("\|")
          val userMovieScore:Array[(String,String,String)] = typess.map(e=>{
            (t._1,t._3,e)
            //uid  score  type
          })
          userMovieScore
        })
                          //uid    type          uid    score   type
        val lt = umScore.toList
        val groupData:Map[(String,String),List[(String,String,String)]] = lt.groupBy(t=>(t._1,t._3))

        val utypeAvg:Map[(String,String),Double] = groupData.mapValues(t=>{
          val avg = t.map(_._2.toDouble).sum/t.length
          avg
        })

        val list:List[((String,String),Double)] = utypeAvg.toList
        val list1:List[(String,String,Double)] =list.map(t=>{
          (t._1._1,t._1._2,t._2)
        })

        val groupList:Map[String,List[(String,String,Double)]] = list1.groupBy(_._1)

        val result:Map[String,(String,Double)] = groupList.mapValues(t=>{
          val lst: List[(String, String, Double)] = t.sortBy(-_._3)
          (lst(0)._2,lst(0)._3)
        })

        result.foreach(println)
      }
    }

    偏函数

    专门是匹配的函数

    scala> var arr = Array(1,2,3,4,5,6)

    arr: Array[Int] = Array(1, 2, 3, 4, 5, 6)

    scala> def pf:PartialFunction[Int,Int]={

         | case x=>x*10

         | }

    pf: PartialFunction[Int,Int]

    scala> arr.map(pf)

    res1: Array[Int] = Array(10, 20, 30, 40, 50, 60)

    scala> var arr = Array(("zhangsan",2000),("lisi",2500))

    arr: Array[(String, Int)] = Array((zhangsan,2000), (lisi,2500))

    scala> def pf:PartialFunction[(String,Int),(String,Int)]={

         | case (x,y)=>(x,y+1000)

         | }

    pf: PartialFunction[(String, Int),(String, Int)]

    scala> arr.map(pf)

    res2: Array[(String, Int)] = Array((zhangsan,3000), (lisi,3500))

    定义偏函数

    def methodName:PartitionFunction[inType,outType]={

    case  =>

    }

    AKKA

    akka是一个通信机制,相当于hadoop中的RPC协议

    akka就是spark1.6以前的通信协议,1.6以后使用的通信协议是netty

    akka相当于多线程 ---> 多线程 --->单线程处理能力差不能解决并发的问题---->多线程就是多个线程一起工作 ---> 线程数据混乱(多线程之间不会进行通信)

    北网 入学

    h5

    python mysql

    hadoop

  • 相关阅读:
    uboot的启动过程-FDT
    pat练习
    也不知道 为什么没通过 -------------存疑 ---延后解决
    刚认识--博客园
    MapReduce报错Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio
    Hadoop 学习笔记(十)HDFS 之 集群安全模式 & DateaNode
    Hadoop 学习笔记(九)HDFS 之 NameNode & SecondaryNameNode
    sqlldr 报错:Field in data file exceeds maximum length 解决方法
    Hadoop 学习笔记(八)HDFS 数据的读写
    Hadoop 学习笔记(七)HDFS 客户端操作(下)
  • 原文地址:https://www.cnblogs.com/JBLi/p/11527151.html
Copyright © 2020-2023  润新知