• Spark MLlib之使用Breeze操作矩阵向量


    这下面的练习中,需要自己将spark的jar包 添加进来。

    1.spark Mlib 底层使用的向量、矩阵运算使用了Breeze库。

    scalaNLP 是一套 机器学习和数值技算的库。它主要是关于科学技术(sc)、机器学习(ML)和自然语言处理(NLP)的。它包括三个库,Breeze、Epic 和 Puck。

    Breeze :是机器学习和数值技术库 ,它是sparkMlib的核心,包括线性代数、数值技术和优化,是一种通用、功能强大、有效的机器学习方法。

    Epic     :是一种高性能能统计分析器和结构化预测库

    Puck    :是一个快速GPU加速解析器

    在使用Breeze 库时,需要导入相关包:

    import breeze.linalg._
    
    import breeze.numerics._
    

      

    具体练习如下:

    package leaning
    
    import breeze.linalg._
    import breeze.numerics._
    import breeze.stats.distributions.Rand
    
    /**
      * Created by dy9776 on 2017/12/5.
      */
    
    
    object Practise_breeze{
      def main(args: Array[String]) {
        val matrix: DenseMatrix[Double] = DenseMatrix.zeros[Double](3,2)
        println(matrix)
        /*
          0.0  0.0
          0.0  0.0
          0.0  0.0
        */
        //全0向量
        val testVector: DenseVector[Double] = DenseVector.zeros[Double](2)
        println(testVector)
    
        //全1向量
        val allOneVector=DenseVector.ones[Double](2)
        println(allOneVector)
    
    
        //按数值填充向量
        val haveNumberFill =DenseVector.fill[Double](3,2)
        println(haveNumberFill)
    
        //生成随机向量
        val rangeNUm= DenseVector.range(1,  10 , 2)//DenseVector(1, 3, 5, 7, 9)
        val rangeNUmD= DenseVector.rangeD(1,  9 , 2)//DenseVector(1.0, 3.0, 5.0, 7.0)
        val rangeNUmF= DenseVector.rangeF(1,  7 , 2)//DenseVector(1.0, 3.0, 5.0)
        println(rangeNUm)
        println(rangeNUmD)
        println(rangeNUmF)
    
        //单位矩阵
        val unitMatrix=DenseMatrix.eye[Double](4)
    //    println(unitMatrix)
         /*
         1.0  0.0  0.0  0.0
         0.0  1.0  0.0  0.0
         0.0  0.0  1.0  0.0
         0.0  0.0  0.0  1.0
          */
    
        //对角矩阵
        val doubleVecoter=diag(DenseVector(3.0, 4.0 , 5.0))
    //    println(doubleVecoter)
        /*
        3.0  0.0  0.0
        0.0  4.0  0.0
        0.0  0.0  5.0
         */
    
        //按照行创建矩阵
        val byRowCreateMatrix= DenseMatrix( (4.0, 5.0, 6.0 ) , (7.0 ,8.0 ,9.0))
    //    println(byRowCreateMatrix)
        /*
        4.0  5.0  6.0
        7.0  8.0  9.0
         */
    
        //按照行创建向量
        val denseCreateVector = DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0))
    //    println(denseCreateVector) ///DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)
    
        //向量装置
        val vectorTranspostion= DenseVector( (4.0, 5.0, 6.0, 7.0, 8.0, 9.0) ).t
        println(vectorTranspostion)//Transpose(DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)))
    
        //从函数创建向量
        val funCreateVector=DenseVector.tabulate(5)(i=> i*i)
        println(funCreateVector)//DenseVector(0, 1, 4, 9, 16)
        val funCreateVector2=DenseVector.tabulate( 0 to 5)(i=> i*i)
        println(funCreateVector2)//DenseVector(0, 1, 4, 9, 16, 25)
    
        //从函数创建矩阵
        val createFuncMatrix= DenseMatrix.tabulate(3, 4) {
          case (i ,j ) => i*i + j*j
        }
    //    println(createFuncMatrix)
        /*
           0  1  4  9
           1  2  5  10
           4  5  8  13
         */
    
        //从数组创建矩阵
        val createFunctionMatrix= new DenseMatrix[Double](3, 2, Array(1.0, 4.0, 7.0, 3.0, 6.0, 9.0))
    //    println(createFunctionMatrix)
        /*
        1.0  3.0
        4.0  6.0
        7.0  9.0
         */
    
        //0 到 1的随机向量
        val formZeroToOneRandomVector= DenseVector.rand( 9, Rand.uniform)
        println(formZeroToOneRandomVector)
    //    DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)
        val formZeroToOneRandomVector2= DenseVector.rand( 9, Rand.uniform)
        println(formZeroToOneRandomVector2)
    //DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)
    
    
        //0 到 1 的随机矩阵
        val formZeroToOneRandomMatrix= DenseMatrix.rand(3, 2, Rand.uniform)
        println(formZeroToOneRandomMatrix)
        /*
        0.8036324612618653  0.538112087890035
        0.6864375371630702  0.3123993272549075
        0.9458628172312897  0.01137554621536796
         */
        val formZeroToOneRandomMatrix2=DenseMatrix.rand(3, 2, Rand.gaussian)
        println(formZeroToOneRandomMatrix2)
        /*
          0.9510499901472648   0.287812938654061
          -0.5266499883462216  0.9380426076781263
          -0.3959295333472151  -0.9057610233257112
         */
    
        //Breeze元素访问
        val a = new DenseVector[Int](Array(1 to 20 : _*))
        println(a)//DenseVector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
    
        //指定位置
    
        println(a(0)) //1
    
        //向量子集
        println( a(1 to 4) )//DenseVector(2, 3, 4, 5)
        println( a(1 until 4) )//DenseVector(2, 3, 4)
    
        //指定开始位置至结尾
        println( a(1 to -1) )//DenseVector(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
    
        //按照指定步长去子集  这个是倒序方式
        println( a(5 to 0 by -1) )//DenseVector(6, 5, 4, 3, 2, 1)
    
    
        //最后一个元素
        println( a(-1)) //20
    
    
        val m = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
        println(m)
        /*
        1.0  2.0  3.0
        4.0  5.0  6.0
         */
    
        //指定位置
        println( m(0 ,1) ) //2.0
    
        //矩阵指定列
        println( m(:: ,1) ) // DenseVector(2.0, 5.0)
    
    
    //Breeze元素操作
    
        //调整矩阵形状
        val justAdjustMatrix =m.reshape(3, 2)
        println(justAdjustMatrix)
        /*
        DenseVector(2.0, 5.0)
        1.0  5.0
        4.0  3.0
        2.0  6.0
         */
    
        //矩阵转成向量
        val toVector=m.toDenseVector
        println(toVector)//DenseVector(1.0, 4.0, 2.0, 5.0, 3.0, 6.0)
        println(toVector.toDenseMatrix)//1.0  4.0  2.0  5.0  3.0  6.0
    
        //复制下三角
        println(lowerTriangular(m))//
        /*
        1.0  0.0
        4.0  5.0
         */
    
        //复制上三角
        println(upperTriangular(m))
        /*
        1.0  2.0
        0.0  5.0
         */
    
        //矩阵复制
        println(m.copy)
        //m 原始为这样的
        //    1.0  2.0
        //    0.0  5.0
    
    
        //取对角线元素
        println(diag(upperTriangular(m)))
        //DenseVector(1.0, 5.0)   个人觉得很怪异,不是应该为(1.0, 6.0)吗?上面上、下三角 也好像出乎的意料
    
    
        //子集赋数值
        println(a(1 to 4 ):=5)
        //(1.0, 5.0)
    
        //子集赋向量
        println( a(1 to 4):=DenseVector(1,2,3,4) )
          //DenseVector(1, 2, 3, 4)
    
        println(m)
        //矩阵赋值
    //    println( m( 1 to 2, 1 to 2) := 0.0 )
    //Exception in thread "main" java.lang.IndexOutOfBoundsException: Row slice of Range(1, 2) was bigger than matrix rows of 2
        println("-==========m1================-")
        println( m( 0 to 1, 1 to 2) := 0.0 )
        println("-==========m================-")
        println(m)
        println("-==========m end================-")
        /*
        -==========m1================-
        0.0  0.0
        0.0  0.0
        -==========m================-
        0.0  0.0  3.0
        0.0  0.0  6.0
        -==========m end================-
        */
    
        //矩阵列赋值
        val re=m(::, 2) := 5.0
        println(re.toDenseMatrix)
        //5.0  5.0
    
    
        val a1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
        val a2 = DenseMatrix((7.0, 8.0, 9.0), (10.0, 11.0, 12.0))
    
    
        //垂直连接矩阵
        val verticalLike=DenseMatrix.vertcat(a1, a2)
        println(verticalLike)
        println("-==========================-")
        /*
        1.0   2.0   3.0
        4.0   5.0   6.0
        7.0   8.0   9.0
        10.0  11.0  12.0
         */
    
        //横向连接矩阵
        val twoMatrixConn=DenseMatrix.horzcat( a1, a2)
        println(twoMatrixConn)
        println("-==========================-")
    /*
    1.0  2.0  3.0  7.0   8.0   9.0
    4.0  5.0  6.0  10.0  11.0  12.0
     */
    
        //向量的连接
        val connnectVector1=DenseVector.vertcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))
        val connnectVector2=DenseVector.horzcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))
    
        println(connnectVector1)//DenseVector(20, 21, 22, 23, 24, 25)
        println(connnectVector2)
        /*
          20  23
          21  24
          22  25
         */
    
    
    //Breeze数值计算函数
        //元素加法
        println(a1 + a2)
        /*
          8.0   10.0  12.0
          14.0  16.0  18.0
         */
    
        //元素乘法
        println(a1 :* a2)
        /*
          7.0   16.0  27.0
          40.0  55.0  72.0
         */
    
        //元素除法
        println(a1 :/ a2)
        /*
          0.14285714285714285  0.25                 0.3333333333333333
          0.4                  0.45454545454545453  0.5
         */
    
        //元素比较
        println(a1 :< a2)
        /*
          true  true  true
          true  true  true
         */
    
        //元素相等
        println(a1 :== a2)
        /*
          false  false  false
          false  false  false
         */
    
        //元素追加
        println(a1 :+=2.0)
        /*
          3.0  4.0  5.0
          6.0  7.0  8.0
         */
    
        //元素追乘
        println(a1 :*=2.0)
        /*
          6.0   8.0   10.0
          12.0  14.0  16.0
         */
    
        //向量点积
        val vectorDot=DenseVector(1, 2, 3, 4) dot DenseVector(1, 1, 1, 1)
        println(vectorDot)//10
    
        //元素最大值
        println(max(a1))//16.0
    
        //元素最小值
        println(min(a1))//6.0
    
        //元素最大值的位置
        println(argmax(a1))// (1,2)
    
        //元素最小值的位置
        println(argmin(a1))// (0,0)
    
        //Breeze求和函数
    
        val m1 = DenseMatrix((1.0, 2.0, 3.0, 4.0), (5.0, 6.0, 7.0, 8.0), (9.0, 10.0, 11.0, 12.0))
        println(m1)
        /*
            1.0  2.0   3.0   4.0
            5.0  6.0   7.0   8.0
            9.0  10.0  11.0  12.0
         */
    
        println("-==========================-")
        //元素求和
        println(sum(m1))//78.0
    
        //每一列求和
        println(sum(m1, Axis._0))//res59: breeze.linalg.DenseMatrix[Double] = 15.0  18.0  21.0  24.0
    
        //每一行求和
        println(sum(m1, Axis._1))//res60: breeze.linalg.DenseVector[Double] = DenseVector(10.0, 26.0, 42.0)
    
        //对角线元素和
         println(trace(lowerTriangular(m1)))// res61: Double = 18.0
    
        //累积和
        val a3 = new DenseVector[Int](Array(10 to 20: _*))
        println(accumulate(a3)) // DenseVector(10, 21, 33, 46, 60, 75, 91, 108, 126, 145, 165)
    
    
    //Breeze布尔函数
    
        val c = DenseVector(true, false, true)
        val d = DenseVector(false, true, true)
        //元素与操作
        println(c :& d) // DenseVector(false, false, true)
    
        //元素或操作
        println(c :| d) //DenseVector(true, true, true)
    
        //元素非操作
        println(!c) //DenseVector(false, true, false)
    
    
        val e = DenseVector[Int](-3, 0, 2)
    
    
        //存在非零元素
        println(any(e)) //true
    
        //所有元素非零
        println(all(e)) //false
    
    //Breeze线性代数函数
        val f = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0))
        val g = DenseMatrix((1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0))
    
        //线性求解,AX = B,求解X
        println(f  g)
          /* breeze.linalg.DenseMatrix[Double] =
            -2.5  -2.5  -2.5
            4.0   4.0   4.0
            -1.5  -1.5  -1.5
           */
    
        //转置
        println(f.t)
        /* breeze.linalg.DenseMatrix[Double] =
          1.0  4.0  7.0
          2.0  5.0  8.0
          3.0  6.0  9.0
         */
    
        //求特征值
        println(det(f)) // Double = 6.661338147750939E-16
    
        //求逆
        println(inv(f))
        /*
          -4.503599627370499E15  9.007199254740992E15    -4.503599627370495E15
          9.007199254740998E15   -1.8014398509481984E16  9.007199254740991E15
          -4.503599627370498E15  9.007199254740992E15    -4.5035996273704955E15
         */
    
        //求伪逆
        println(pinv(f))
        /*
          -3.7720834019330525E14  7.544166803866101E14    -3.77208340193305E14
          7.544166803866094E14    -1.5088333607732208E15  7.544166803866108E14
          -3.772083401933041E14   7.544166803866104E14    -3.772083401933055E14
         */
    
        //特征值和特征向量
        println(eig(f))
        /*
        Eig(DenseVector(16.116843969807043, -1.1168439698070427, -1.3036777264747022E-15),DenseVector(0.0, 0.0, 0.0),-0.23197068724628617  -0.7858302387420671   0.40824829046386363
            -0.5253220933012336   -0.08675133925662833  -0.816496580927726
             -0.8186734993561815   0.61232756022881      0.4082482904638625
            )
         */
    
    
        //奇异值分解
        val svd.SVD(u,s,v) = svd(g)
        println(u)
        /*
          -0.5773502691896255  -0.5773502691896257  -0.5773502691896256
          -0.5773502691896256  -0.2113248654051871  0.7886751345948126
          -0.5773502691896256  0.7886751345948129   -0.21132486540518708
         */
        println("==============================")
        println(s) //DenseVector(3.0000000000000004, 0.0, 0.0)
        println("==============================")
        println(v)
        /*
          -0.5773502691896256  -0.5773502691896257  -0.5773502691896256
          0.0                  -0.7071067811865474  0.7071067811865477
          0.816496580927726    -0.4082482904638629  -0.4082482904638628
         */
    
    
        //求矩阵的秩
        println(rank(f))  //2
    
        //矩阵长度
        println(f.size) //9
    
        //矩阵行数
        println(f.rows) // 3
    
        //矩阵列数
        f.cols // 3
    
    
    //Breeze取整函数
    
        val h = DenseVector(-1.2, 0.7, 2.3) // breeze.linalg.DenseVector[Double] = DenseVector(-1.2, 0.7, 2.3)
    
        //四舍五入
        println( round(h)  ) // breeze.linalg.DenseVector[Long] = DenseVector(-1, 1, 2)
    
        //大于它的最小整数
        println( ceil(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 3.0)
    
        //小于它的最大整数
        println( floor(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-2.0, 0.0, 2.0)
    
        //符号函数
        println( signum(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 1.0)
    
        //取正数
        println( abs(h) )  // breeze.linalg.DenseVector[Double] = DenseVector(1.2, 0.7, 2.3)
    
    
      }
    
    }
    

      

  • 相关阅读:
    观察者模式(Observer)
    怎样解决Java Web项目更改项目名后报错
    MAVEN最佳实践:模块划分
    java.lang.OutOfMemoryError: PermGen space及其解决方法
    以Windows服务方式启动MySQL,并将其默认编码设置为UTF-8
    ubuntu 12.04和Windows 7双系统的安装方法
    允许ubuntu下mysql远程连接
    Linux 系统目录介绍
    SVN中图标符号的含义
    简单介绍Linux下安装Tomcat的步骤
  • 原文地址:https://www.cnblogs.com/nucdy/p/7988959.html
Copyright © 2020-2023  润新知