• Spark算上下基线


    def baseLine(sc: SparkContext): Unit = {
        println("--------------------baseLine start--------------------")
        var data1Rdd = sc.textFile("/test/baseLineTestData.txt")
        var map = HashMap[String, HashMap[String, collection.mutable.ArrayBuffer[Double]]]()
        
        val data1 = data1Rdd.collect()
        
        data1.map { line =>
          println("--------------------data1.foreach start--------------------")
          val parts = line.split('|')
          val ip = parts(0)
          val port = parts(1)
          val startTime = parts(2)
          val endTime = parts(3)
          val sun = parts(4).toDouble
          
          println("ip:"+ip)
          println("port:"+port)
          println("startTime:"+startTime)
          println("endTime:"+endTime)
          println("sun:"+sun)
    
          //ip+port,14:02 14:07 List
          //ip+port,15:02 15:07 List
    
          val key1 = ip + "_" + port
          println("key1:"+key1)
    
          val key2 = startTime.split(" ")(1) + "_" + endTime.split(" ")(1)
          println("key2:"+key2)
    
          var tmpMap = map.get(key1)
    
          if (tmpMap != null && tmpMap.size > 0) {
            println("--------------------map is not null--------------------")
            val sumArray = tmpMap.get(key2)
            if (sumArray != null) {
              sumArray += sun
            }
          } else {
            println("--------------------map is null--------------------")
            //如果当前Key不存在的话,是一个全新的Ip
            val sumArray = collection.mutable.ArrayBuffer[Double]()
    
            val secondMap = HashMap[String, collection.mutable.ArrayBuffer[Double]]()
            secondMap += (key2 -> sumArray)
            map += (key1 -> secondMap)
          }
        }
        
        println("--------------------get data is end--------------------")
    
        map.map(e => {
          println("--------------------Statistics start --------------------")
          val resultKey1 = e._1
          val resultVal1 = e._2
          println("resultKey1:" + resultKey1)
          resultVal1.foreach(f => {
            val resultKey2 = f._1
            val resultVal2 = f._2
            println("resultKey2:" + resultKey2)
    
            val dataArray = resultVal2.map(f => Vectors.dense(f))
    
            val summary: MultivariateStatisticalSummary = Statistics.colStats(sc.parallelize(dataArray))
    
            //
            println("--------------------mean:"+summary.mean+" --------------------")
            println("--------------------variance:"+summary.variance+" --------------------")
            
            println("--------------------mean apply 0:"+summary.mean.toArray.apply(0)+" --------------------")
            println("--------------------variance apply 0:"+summary.variance.apply(0)+" --------------------")
            
            val upbase = summary.mean.toArray.apply(0) + 1.960 * Math.sqrt(summary.variance.apply(0))
            val downbase = summary.mean.toArray.apply(0) - 1.960 * Math.sqrt(summary.variance.apply(0))
            println("------------------- " + upbase + " ---------- " + downbase)
            val df = new DecimalFormat(".##")
            val upbaseString = df.format(upbase)
            val downbaseString = df.format(downbase)
            //resultMap.put(key, value)
            val result3 = HashMap[Double, Double]()
            //result3 +=(upbase -> downbase)
            println("ip port:" + resultKey1 + ",time:" + resultKey2 + ",upbase:" + upbase + ",downbase:" + downbase)
          })
        })
        
        println("--------------------baseLine end --------------------")
    
      }
    

    需求:计算某一个IP的端口在某一个时间点的流量上下基线

    数据样例:

    192.168.10.110|8080|2015-10-14 14:02|2015-10-14 14:07|3210981
    192.168.10.110|8080|2015-10-13 14:02|2015-10-13 14:07|3210881
    192.168.10.110|8080|2015-10-12 14:02|2015-10-12 14:07|3210781
    192.168.10.110|8080|2015-10-11 14:02|2015-10-11 14:07|3210681
    192.168.10.110|8080|2015-10-10 14:02|2015-10-10 14:07|3210581
    192.168.10.110|8080|2015-10-09 14:02|2015-10-09 14:07|3210481
    192.168.10.110|8080|2015-10-08 14:02|2015-10-08 14:07|3210381
    192.168.10.110|8080|2015-10-07 14:02|2015-10-07 14:07|3210281
    192.168.10.110|8080|2015-10-06 14:02|2015-10-06 14:07|3210181
    192.168.10.110|8080|2015-10-05 14:02|2015-10-05 14:07|3210081
    192.168.10.110|8080|2015-10-04 14:02|2015-10-04 14:07|3219981
    192.168.10.110|8080|2015-10-03 14:02|2015-10-03 14:07|3218981
    192.168.10.110|8080|2015-10-02 14:02|2015-10-02 14:07|3217981
    192.168.10.110|8080|2015-10-01 14:02|2015-10-01 14:07|3216981
    192.168.10.110|8080|2015-09-30 14:02|2015-09-30 14:07|3215981
    192.168.10.110|8080|2015-09-29 14:02|2015-09-29 14:07|3214981
    192.168.10.110|8080|2015-09-28 14:02|2015-09-28 14:07|3213981
    192.168.10.110|8080|2015-09-27 14:02|2015-09-27 14:07|3212981
    192.168.10.110|8080|2015-09-26 14:02|2015-09-26 14:07|3211981
    192.168.10.110|8080|2015-09-25 14:02|2015-09-25 14:07|3220981
    192.168.10.110|8080|2015-09-24 14:02|2015-09-24 14:07|3230981
    192.168.10.110|8080|2015-09-23 14:02|2015-09-23 14:07|3240981
    192.168.10.110|8080|2015-09-22 14:02|2015-09-22 14:07|3250981
    192.168.10.110|8080|2015-09-21 14:02|2015-09-21 14:07|3260981
    192.168.10.110|8080|2015-09-20 14:02|2015-09-20 14:07|3270981
    192.168.10.110|8080|2015-09-19 14:02|2015-09-19 14:07|3280981
    192.168.10.110|8080|2015-09-18 14:02|2015-09-18 14:07|3290981
    192.168.10.110|8080|2015-09-17 14:02|2015-09-17 14:07|3210982
    192.168.10.110|8080|2015-09-16 14:02|2015-09-16 14:07|3210983
    192.168.10.110|8080|2015-09-15 14:02|2015-09-15 14:07|3210984
    192.168.10.110|8080|2015-09-14 14:02|2015-09-14 14:07|3210985
    192.168.10.110|8080|2015-09-13 14:02|2015-09-13 14:07|3210986
    192.168.10.110|8080|2015-09-12 14:02|2015-09-12 14:07|3210987
    192.168.10.110|8080|2015-09-11 14:02|2015-09-11 14:07|3210988
    192.168.10.110|8080|2015-09-10 14:02|2015-09-10 14:07|3110989
    192.168.10.110|8080|2015-09-09 14:02|2015-09-09 14:07|3210981
    192.168.10.110|8080|2015-09-07 14:02|2015-09-07 14:07|3310981
    192.168.10.110|8080|2015-09-06 14:02|2015-09-06 14:07|3410981
    192.168.10.110|8080|2015-09-05 14:02|2015-09-05 14:07|2510981
    192.168.10.110|8081|2015-10-14 14:02|2015-10-14 14:07|2210981
    192.168.10.110|8081|2015-10-13 14:02|2015-10-13 14:07|2210881
    192.168.10.110|8081|2015-10-12 14:02|2015-10-12 14:07|2210781
    192.168.10.110|8081|2015-10-11 14:02|2015-10-11 14:07|2210681
    192.168.10.110|8081|2015-10-10 14:02|2015-10-10 14:07|2210581
    192.168.10.110|8081|2015-10-09 14:02|2015-10-09 14:07|2210481
    192.168.10.110|8081|2015-10-08 14:02|2015-10-08 14:07|2210381
    192.168.10.110|8081|2015-10-07 14:02|2015-10-07 14:07|2210281
    192.168.10.110|8081|2015-10-06 14:02|2015-10-06 14:07|2210181
    192.168.10.110|8081|2015-10-05 14:02|2015-10-05 14:07|2210081
    192.168.10.110|8081|2015-10-04 14:02|2015-10-04 14:07|2219981
    192.168.10.110|8081|2015-10-03 14:02|2015-10-03 14:07|2218981
    192.168.10.110|8081|2015-10-02 14:02|2015-10-02 14:07|2217981
    192.168.10.110|8081|2015-10-01 14:02|2015-10-01 14:07|2216981
    192.168.10.110|8081|2015-09-30 14:02|2015-09-30 14:07|2215981
    192.168.10.110|8081|2015-09-29 14:02|2015-09-29 14:07|2214981
    192.168.10.110|8081|2015-09-28 14:02|2015-09-28 14:07|2213981
    192.168.10.110|8081|2015-09-27 14:02|2015-09-27 14:07|2212981
    192.168.10.110|8081|2015-09-26 14:02|2015-09-26 14:07|2211981
    192.168.10.110|8081|2015-09-25 14:02|2015-09-25 14:07|2220981
    192.168.10.110|8081|2015-09-24 14:02|2015-09-24 14:07|2230981
    192.168.10.110|8081|2015-09-23 14:02|2015-09-23 14:07|2240981
    192.168.10.110|8081|2015-09-22 14:02|2015-09-22 14:07|2250981
    192.168.10.110|8081|2015-09-21 14:02|2015-09-21 14:07|2260981
    192.168.10.110|8081|2015-09-20 14:02|2015-09-20 14:07|2270981
    192.168.10.110|8081|2015-09-19 14:02|2015-09-19 14:07|2280981
    192.168.10.110|8081|2015-09-18 14:02|2015-09-18 14:07|2290981
    192.168.10.110|8081|2015-09-17 14:02|2015-09-17 14:07|2210982
    192.168.10.110|8081|2015-09-16 14:02|2015-09-16 14:07|2210983
    192.168.10.110|8081|2015-09-15 14:02|2015-09-15 14:07|2210984
    192.168.10.110|8081|2015-09-14 14:02|2015-09-14 14:07|2210985
    192.168.10.110|8081|2015-09-13 14:02|2015-09-13 14:07|2210986
    192.168.10.110|8081|2015-09-12 14:02|2015-09-12 14:07|2210987
    192.168.10.110|8081|2015-09-11 14:02|2015-09-11 14:07|2210988
    192.168.10.110|8081|2015-09-10 14:02|2015-09-10 14:07|2110989
    192.168.10.110|8081|2015-09-09 14:02|2015-09-09 14:07|2210981
    192.168.10.110|8081|2015-09-07 14:02|2015-09-07 14:07|2310981
    192.168.10.110|8081|2015-09-06 14:02|2015-09-06 14:07|2410981
    192.168.10.110|8081|2015-09-05 14:02|2015-09-05 14:07|2510981
    

      

  • 相关阅读:
    英语长难句
    服务器部署 halo博客项目
    11月迟来的总结
    10月总结
    9月总结
    python根据字符串导入模块
    RestFul(番外):类视图更适合restful
    Django-基础 Meta自定义
    (垃圾代码)修改同目录下面的xml文件标签数值
    Django-templatetags设置(在templates中使用自定义变量)
  • 原文地址:https://www.cnblogs.com/qq27271609/p/4883086.html
Copyright © 2020-2023  润新知