• numpy


    数据类型

    import numpy as np
    import random
    
    t1 = np.array([1, 2, 3])
    print(t1, type(t1))     # [1 2 3] <class 'numpy.ndarray'>
    
    t2 = np.array(range(5))
    print(t2, type(t2))     # [0 1 2 3 4] <class 'numpy.ndarray'>
    
    t3 = np.arange(5)       # arange用法参数类range
    print(t3, type(t3))     # [0 1 2 3 4] <class 'numpy.ndarray'>
    print(t3.dtype)         # int64
    
    # numpy数据类型
    t4 = np.array(range(1, 4), dtype=float)     # dtype指定类型
    print(t4, t4.dtype)     # [1. 2. 3.] float64
    
    # numpy bool类型
    t5 = np.array([1, 0, 0, 1, 0], dtype=bool)
    print(t5, t5.dtype)     # [ True False False  True False] bool
    
    # 调整数据类型
    t6 = t5.astype('int8')
    print(t6, t6.dtype)     # [1 0 0 1 0] int8
    
    # numpy小数
    t7 = np.array([random.random() for i in range(10)])
    print(t7, t7.dtype)     # [0.84702583 0.916558   0.44216734 0.53020263 0.44274757 0.0559538 0.53722744 0.04059448 0.70912489 0.94199106] float64
    
    t8 = np.round(t7, 2)
    print(t8, t8.dtype)     # [0.85 0.92 0.44 0.53 0.44 0.06 0.54 0.04 0.71 0.94] float64
    
    t9 = '%.2f'%random.random()     # %:占位符, 2:保留二位小数, f:浮点型
    print(t9, type(t9))     # 0.65 <class 'str'>
    
    In [1]: import numpy as np
    
    In [2]: t1 = np.arange(12)
    
    In [3]: t1
    Out[3]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
    
    In [4]: t1.shape    # 查看数组形状
    Out[4]: (12,)       # arrary() 参数只有一个列表时, 一维数组,t1.shape元祖一个值,12为列表元素数量
    
    In [5]: t2 = np.array([[1,2,3],[4,5,6]])
    
    In [6]: t2
    Out[6]:
    array([[1, 2, 3],
           [4, 5, 6]])
    
    In [7]: t2.shape
    Out[7]: (2, 3)      # arrary() 参数只有二个列表时, 二维数组,t1.shape元祖有二个值,(2, 3), 2行数,3列数
    
    In [8]: t3 = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
    
    In [9]: t3
    Out[9]:
    array([[[ 1,  2,  3],
            [ 4,  5,  6]],
    
           [[ 7,  8,  9],
            [10, 11, 12]]])
    
    In [10]: t3.shape   # t3为三维数组,t3.shape元祖有三个值
    Out[10]: (2, 2, 3)
    
    In [11]: t4 = np.arange(12)
    
    In [12]: t4
    Out[12]: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
    
    In [13]: t4.reshape((3,4))      # 修改数组形状,变成3行4列
    Out[13]:
    array([[ 0,  1,  2,  3],
           [ 4,  5,  6,  7],
           [ 8,  9, 10, 11]])
    
    In [14]: t4.reshape((3,5))
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-14-7634752f2bda> in <module>
    ----> 1 t4.reshape((3,5))
    
    ValueError: cannot reshape array of size 12 into shape (3,5)
    
    
    In [15]: t5 = np.arange(24).reshape((2,3,4))    # (2,3,4):2块,3行,4列
    
    In [16]: t5
    Out[16]:
    array([[[ 0,  1,  2,  3],
            [ 4,  5,  6,  7],
            [ 8,  9, 10, 11]],
    
           [[12, 13, 14, 15],
            [16, 17, 18, 19],
            [20, 21, 22, 23]]])
    
    
    In [17]: t5.reshape((4,6))
    Out[17]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [18]: t5
    Out[18]:
    array([[[ 0,  1,  2,  3],
            [ 4,  5,  6,  7],
            [ 8,  9, 10, 11]],
    
           [[12, 13, 14, 15],
            [16, 17, 18, 19],
            [20, 21, 22, 23]]])
    
    In [19]: t5 = t5.reshape((4,6))
    
    In [20]: t5
    Out[20]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [21]: t5.reshape((24,))
    Out[21]:
    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
           17, 18, 19, 20, 21, 22, 23])
    
    In [22]: t5.reshape((24,1))
    Out[22]:
    array([[ 0],
           [ 1],
           [ 2],
           [ 3],
           [ 4],
           [ 5],
           [ 6],
           [ 7],
           [ 8],
           [ 9],
           [10],
           [11],
           [12],
           [13],
           [14],
           [15],
           [16],
           [17],
           [18],
           [19],
           [20],
           [21],
           [22],
           [23]])
    
    In [23]: t5.reshape((1,24))
    Out[23]:
    array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
            16, 17, 18, 19, 20, 21, 22, 23]])
    
    In [24]: t6 = t5.reshape((t5.shape[0]*t5.shape[1],))    # t5.shape[0]:t5行数,t5.shape[1]:t5列数
    
    In [25]: t6
    Out[25]:
    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
           17, 18, 19, 20, 21, 22, 23])
    
    In [26]: t5
    Out[26]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [27]: t5.flatten()       # t5.flatten()数据转为一维数组
    Out[27]:
    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
           17, 18, 19, 20, 21, 22, 23])
    
    In [28]: t5
    Out[28]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [29]: t5+2
    Out[29]:
    array([[ 2,  3,  4,  5,  6,  7],
           [ 8,  9, 10, 11, 12, 13],
           [14, 15, 16, 17, 18, 19],
           [20, 21, 22, 23, 24, 25]])
    
    In [30]: t5*2
    Out[30]:
    array([[ 0,  2,  4,  6,  8, 10],
           [12, 14, 16, 18, 20, 22],
           [24, 26, 28, 30, 32, 34],
           [36, 38, 40, 42, 44, 46]])
    
    In [31]: t5/2
    Out[31]:
    array([[ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5],
           [ 3. ,  3.5,  4. ,  4.5,  5. ,  5.5],
           [ 6. ,  6.5,  7. ,  7.5,  8. ,  8.5],
           [ 9. ,  9.5, 10. , 10.5, 11. , 11.5]])
    
    In [32]: t5/0
    /Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/ipython:1: RuntimeWarning: divide by zero encountered in true_divide
      #!/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/python
    /Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/ipython:1: RuntimeWarning: invalid value encountered in true_divide
      #!/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/python
    Out[32]:            # inf +∞
    array([[nan, inf, inf, inf, inf, inf],
           [inf, inf, inf, inf, inf, inf],
           [inf, inf, inf, inf, inf, inf],
           [inf, inf, inf, inf, inf, inf]])
    
    In [33]: t5
    Out[33]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [34]: t6 = np.arange(100,124).reshape((4,6))
    
    In [35]: t6
    Out[35]:
    array([[100, 101, 102, 103, 104, 105],
           [106, 107, 108, 109, 110, 111],
           [112, 113, 114, 115, 116, 117],
           [118, 119, 120, 121, 122, 123]])
    
    In [36]: t5+t6
    Out[36]:
    array([[100, 102, 104, 106, 108, 110],
           [112, 114, 116, 118, 120, 122],
           [124, 126, 128, 130, 132, 134],
           [136, 138, 140, 142, 144, 146]])
    
    In [37]: t5*t6
    Out[37]:
    array([[   0,  101,  204,  309,  416,  525],
           [ 636,  749,  864,  981, 1100, 1221],
           [1344, 1469, 1596, 1725, 1856, 1989],
           [2124, 2261, 2400, 2541, 2684, 2829]])
    
    In [38]: t6/t5
    /Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/ipython:1: RuntimeWarning: divide by zero encountered in true_divide
      #!/Users/xyp/opt/anaconda3/envs/DataAnalysis/bin/python
    Out[38]:
    array([[         inf, 101.        ,  51.        ,  34.33333333,
             26.        ,  21.        ],
           [ 17.66666667,  15.28571429,  13.5       ,  12.11111111,
             11.        ,  10.09090909],
           [  9.33333333,   8.69230769,   8.14285714,   7.66666667,
              7.25      ,   6.88235294],
           [  6.55555556,   6.26315789,   6.        ,   5.76190476,
              5.54545455,   5.34782609]])
    
    In [39]: t7 = np.arange(0,6)
    
    In [40]: t7
    Out[40]: array([0, 1, 2, 3, 4, 5])
    
    In [41]: t5
    Out[41]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [42]: t5-t7
    Out[42]:
    array([[ 0,  0,  0,  0,  0,  0],
           [ 6,  6,  6,  6,  6,  6],
           [12, 12, 12, 12, 12, 12],
           [18, 18, 18, 18, 18, 18]])
    
    In [43]: t8 = np.arange(4).reshape((4,1))
    
    In [44]: t8
    Out[44]:
    array([[0],
           [1],
           [2],
           [3]])
    
    In [45]: t5-t8
    Out[45]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 5,  6,  7,  8,  9, 10],
           [10, 11, 12, 13, 14, 15],
           [15, 16, 17, 18, 19, 20]])
    
    In [46]: t9 = np.arange(10)
    
    In [47]: t9
    Out[47]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    
    In [48]: t5
    Out[48]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [49]: t5-t9
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-49-dcf9c8dd3788> in <module>
    ----> 1 t5-t9
    
    ValueError: operands could not be broadcast together with shapes (4,6) (10,)
    

      

    文件读取

    import numpy as np
    
    us_file_path = './youtube_video_data/US_video_data_numbers.csv'
    
    # delimiter读取csv用,分割;unpack默认False,按行读取数据,unpack=True按列读取数据,转置;dtype数据类型,默认科学计数方式;skiprows跳过前X行;usecols读取指定列,索引,元祖类型
    t1 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int)
    print(t1, '
    ', '*'*100)
    t2 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int, unpack=True)
    print(t2)
    
    [[4394029  320053    5931   46245]
     [7860119  185853   26679       0]
     [5845909  576597   39774  170708]
     ...
     [ 142463    4231     148     279]
     [2162240   41032    1384    4737]
     [ 515000   34727     195    4722]] 
     ****************************************************************************************************
    [[4394029 7860119 5845909 ...  142463 2162240  515000]
     [ 320053  185853  576597 ...    4231   41032   34727]
     [   5931   26679   39774 ...     148    1384     195]
     [  46245       0  170708 ...     279    4737    4722]]
    

     

    转置三种方法和简单运算

    # 转置三种方法
    
    In [2]: import numpy as np
    
    In [3]: t2 = np.arange(24).reshape((4,6))
    
    In [4]: t2
    Out[4]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [6]: t2.transpose()          # 转置
    Out[6]:
    array([[ 0,  6, 12, 18],
           [ 1,  7, 13, 19],
           [ 2,  8, 14, 20],
           [ 3,  9, 15, 21],
           [ 4, 10, 16, 22],
           [ 5, 11, 17, 23]])
    
    In [7]: t2.T                    # 转置
    Out[7]:
    array([[ 0,  6, 12, 18],
           [ 1,  7, 13, 19],
           [ 2,  8, 14, 20],
           [ 3,  9, 15, 21],
           [ 4, 10, 16, 22],
           [ 5, 11, 17, 23]])
    
    In [8]: t2.swapaxes(1,0)        # 交换轴
    Out[8]:
    array([[ 0,  6, 12, 18],
           [ 1,  7, 13, 19],
           [ 2,  8, 14, 20],
           [ 3,  9, 15, 21],
           [ 4, 10, 16, 22],
           [ 5, 11, 17, 23]])
    
    In [9]: t2
    Out[9]:
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [10]: t2<10
    Out[10]:
    array([[ True,  True,  True,  True,  True,  True],
           [ True,  True,  True,  True, False, False],
           [False, False, False, False, False, False],
           [False, False, False, False, False, False]])
    
    In [11]: t2[t2<10]=3
    
    In [12]: t2
    Out[12]:
    array([[ 3,  3,  3,  3,  3,  3],
           [ 3,  3,  3,  3, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [13]: t2[t2>20]
    Out[13]: array([21, 22, 23])
    
    In [16]: t2
    Out[16]:
    array([[ 3,  3,  3,  3,  3,  3],
           [ 3,  3,  3,  3, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    
    In [17]: np.where(t2<=3,100,300)            # numpy三元运算符,t2 = 100 if t2<=3 else 300
    Out[17]:
    array([[100, 100, 100, 100, 100, 100],
           [100, 100, 100, 100, 300, 300],
           [300, 300, 300, 300, 300, 300],
           [300, 300, 300, 300, 300, 300]])
    
    In [19]: t = np.arange(20)
    
    In [20]: t
    Out[20]:
    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
           17, 18, 19])
    
    In [21]: t.clip(10,18)                      # clip(10,18),小于10的替换成10,大于18的替换成18
    Out[21]:
    array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 12, 13, 14, 15, 16,
           17, 18, 18])
    
    In [22]: t[2]=np.nan
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-22-1aa5d7dd59fe> in <module>
    ----> 1 t[2]=np.nan
    
    ValueError: cannot convert float NaN to integer
    
    In [23]: t=t.astype(float)
    
    In [24]: t[2]=np.nan
    
    In [25]: t
    Out[25]:
    array([ 0.,  1., nan,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
           13., 14., 15., 16., 17., 18., 19.]) 
    

      

    索引和切片

    import numpy as np
    
    us_file_path = './youtube_video_data/US_video_data_numbers.csv'
    uk_file_path = './youtube_video_data/GB_video_data_numbers.csv'
    
    # delimiter读取csv用,分割;unpack默认False,按行读取数据,unpack=True按列读取数据,转置;dtype数据类型,默认科学计数方式;skiprows跳过前X行;usecols读取指定列,索引,元祖类型
    t1 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int)
    print(t1, '
    ', '*'*100)
    t2 = np.loadtxt(us_file_path, delimiter=',', dtype=np.int, unpack=True)
    # print(t2)
    # 取行
    # print(t1[2])
    # 取多行
    # print(t1[2:])
    # 取指定行,2,8,10为索引
    print(t1[[2, 8, 10]], '
    ', '*'*100)
    # 取连续列和列,[1, :]、[2, :]、[[2, 3, 10], :]、[2, [0, 2]] 逗号前为指定行,逗号后为指定列
    # print(t1[1, :])
    # print(t1[2, :])
    # print(t1[[2, 3, 10], :])
    # print(t1[2, [0, 2]])
    print(t1[2:5, 1:4], '
    ', '*'*100)
    # 取指定行和列,[[0, 2, 3], [0, 1, 3]]不是取索引为0,2,3的行和0,1,3的列,取的是t1坐标(0,2),(2,1),(3,3)的数据
    print(t1[[0, 2, 3], [0, 1, 3]])
    
    
    [[4394029  320053    5931   46245]
     [7860119  185853   26679       0]
     [5845909  576597   39774  170708]
     ...
     [ 142463    4231     148     279]
     [2162240   41032    1384    4737]
     [ 515000   34727     195    4722]] 
     ****************************************************************************************************
    [[5845909  576597   39774  170708]
     [1338533   69687     678    5643]
     [ 859289   34485     726    1914]] 
     ****************************************************************************************************
    [[576597  39774 170708]
     [ 24975   4542  12829]
     [ 96666    568   6666]] 
     ****************************************************************************************************
    [4394029  576597   12829] 
    

      

    数组的拼接

    In [31]: t1
    Out[31]:
    array([[0, 1, 2, 3],
           [4, 5, 6, 7]])
    
    In [32]: t2
    Out[32]:
    array([[ 8,  9, 10, 11],
           [12, 13, 14, 15]])
    
    In [33]: np.vstack((t1,t2))     # 竖直拼接
    Out[33]:
    array([[ 0,  1,  2,  3],
           [ 4,  5,  6,  7],
           [ 8,  9, 10, 11],
           [12, 13, 14, 15]])
    
    In [34]: np.hstack((t1,t2))     # 水平拼接
    Out[34]:
    array([[ 0,  1,  2,  3,  8,  9, 10, 11],
           [ 4,  5,  6,  7, 12, 13, 14, 15]])
     
    
    数组的行列交换
    In [39]: t = np.arange(12,24).reshape(3,4)
    
    In [40]: t
    Out[40]:
    array([[12, 13, 14, 15],
           [16, 17, 18, 19],
           [20, 21, 22, 23]])
    
    In [41]: t[[1,2],:] = t[[2,1],:]    # 行交换
    
    In [42]: t
    Out[42]:
    array([[12, 13, 14, 15],
           [20, 21, 22, 23],
           [16, 17, 18, 19]])
    
    In [43]: t[:,[0,2]] = t[:,[2,0]]    # 列交换
    
    In [44]: t
    Out[44]:
    array([[14, 13, 12, 15],
           [22, 21, 20, 23],
           [18, 17, 16, 19]])
    

      

    numpy中nan和常用方法

    In [39]: np.nan == np.nan
    Out[39]: False
    
    In [40]: np.nan != np.nan
    Out[40]: True
    
    In [49]: t
    Out[49]:
    array([[0., 1., 2., 3., 4.],
           [5., 6., 7., 8., 9.]])
    
    In [50]: t[:,0] = 0
    
    In [51]: t
    Out[51]:
    array([[0., 1., 2., 3., 4.],
           [0., 6., 7., 8., 9.]])
    
    In [52]: np.count_nonzero(t)        # 判断t数组中非0个数
    Out[52]: 8
    
    In [56]: t[:,0] = np.nan
    
    In [57]: t
    Out[57]:
    array([[nan,  1.,  2.,  3.,  4.],
           [nan,  6.,  7.,  8.,  9.]])
    
    In [58]: t != t             # 当t!=t时,数组中为nan时为True
    Out[58]:
    array([[ True, False, False, False, False],
           [ True, False, False, False, False]])
    
    In [59]: np.count_nonzero(t!=t)     # 数组t中为nan的个数
    Out[59]: 2
    
    In [60]: np.isnan(t)                # 数组中为nan时为True
    Out[60]:
    array([[ True, False, False, False, False],
           [ True, False, False, False, False]])
    
    In [61]: np.count_nonzero(np.isnan(t))      # 数组t中为nan的个数
    Out[61]: 2
    
    In [62]: np.sum(t)      # nan和任何值的计算都为nan
    Out[62]: nan
    
    In [63]: np.sum(t,axis=0)       # sum(t,axis=0) 列相加结果
    Out[63]: array([nan,  7.,  9., 11., 13.])
    
    In [64]: np.sum(t,axis=1)       # sum(t,axis=1) 行相加结果
    Out[64]: array([nan, nan])
    

      

    numpy中常用统计方法

    In [75]: t
    Out[75]:
    array([[0, 1, 2, 3, 4],
           [5, 6, 7, 8, 9]])
    
    In [76]: t.sum(axis=0)
    Out[76]: array([ 5,  7,  9, 11, 13])
    
    In [77]: t.sum(axis=1)
    Out[77]: array([10, 35])
    
    In [78]: t.mean(axis=0)
    Out[78]: array([2.5, 3.5, 4.5, 5.5, 6.5])
    
    In [79]: t.mean(axis=1)     # 均值,当数组中有nan时,剩余非nan元素的均值替换nan
    Out[79]: array([2., 7.])
    
    In [80]: np.median(t)       # 中值
    Out[80]: 4.5
    
    In [81]: np.median(t,axis=0)
    Out[81]: array([2.5, 3.5, 4.5, 5.5, 6.5])
    
    In [82]: np.median(t,axis=1)
    Out[82]: array([2., 7.])
    
    In [83]: t.max()
    Out[83]: 9
    
    In [84]: t.max(axis=0)
    Out[84]: array([5, 6, 7, 8, 9])
    
    In [85]: t.max(axis=1)
    Out[85]: array([4, 9])
    
    In [86]: t.min(axis=1)
    Out[86]: array([0, 5])
    
    In [87]: np.ptp(t,axis=0)       # 极值,最大值和最小值差
    Out[87]: array([5, 5, 5, 5, 5])
    
    In [88]: np.ptp(t,axis=1)
    Out[88]: array([4, 4])
    
    In [92]: t
    Out[92]:
    array([[0, 1, 2, 3, 4],
           [5, 6, 7, 8, 9]])
    
    In [93]: t.std()            # 标准差
    Out[93]: 2.8722813232690143
    
    In [94]: t.std(axis=0)
    Out[94]: array([2.5, 2.5, 2.5, 2.5, 2.5])
    
    In [95]: t.std(axis=1)
    Out[95]: array([1.41421356, 1.41421356])
    

       

    import numpy as np
    
    us_file_path = './youtube_video_data/US_video_data_numbers.csv'
    uk_file_path = './youtube_video_data/GB_video_data_numbers.csv'
    
    # 加载国家数据
    us_data = np.loadtxt(us_file_path, delimiter=',', dtype='int')
    uk_data = np.loadtxt(uk_file_path, delimiter=',', dtype='int')
    
    # 添加国家信息
    # 构造全为0数据
    zeros_data = np.zeros((us_data.shape[0], 1)).astype(int)
    ones_data = np.ones((uk_data.shape[0], 1)).astype(int)
    
    # 分别添加一列全为0,1的数组
    us_data = np.hstack((us_data, zeros_data))
    uk_data = np.hstack((uk_data, ones_data))
    
    # 拼接两组数据,最后一列全为0的代表us,为1的代表uk
    final_data = np.vstack((us_data, uk_data))
    print(final_data)
    
    # 把二个国家数据拼接一起研究分析数据
    import numpy as np
    
    us_file_path = './youtube_video_data/US_video_data_numbers.csv'
    uk_file_path = './youtube_video_data/GB_video_data_numbers.csv'
    
    # 加载国家数据
    us_data = np.loadtxt(us_file_path, delimiter=',', dtype='int')
    uk_data = np.loadtxt(uk_file_path, delimiter=',', dtype='int')
    
    # 添加国家信息
    # 构造全为0数据
    zeros_data = np.zeros((us_data.shape[0], 1)).astype(int)
    ones_data = np.ones((uk_data.shape[0], 1)).astype(int)
    
    # 分别添加一列全为0,1的数组
    us_data = np.hstack((us_data, zeros_data))
    uk_data = np.hstack((uk_data, ones_data))
    
    # 拼接两组数据,最后一列全为0的代表us,为1的代表uk
    final_data = np.vstack((us_data, uk_data))
    print(final_data)
    把二个国家数据拼接
    # numpy更多方法
    In [1]: import numpy as np
    
    In [2]: np.ones((3,4))      # 创建全为1的数组
    Out[2]:
    array([[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]])
    
    In [3]: np.zeros((3,4))     # 创建全为0的数组
    Out[3]:
    array([[0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.]])
    
    In [4]: np.eye(5)           # 创建对角线全为1的正方形数组
    Out[4]:
    array([[1., 0., 0., 0., 0.],
           [0., 1., 0., 0., 0.],
           [0., 0., 1., 0., 0.],
           [0., 0., 0., 1., 0.],
           [0., 0., 0., 0., 1.]])
    
    In [5]: t = np.eye(5)
    
    In [6]: np.argmax(t,axis=0)         # 获取最大值位置
    Out[6]: array([0, 1, 2, 3, 4])
    
    In [7]: t[t==1] = -1
    
    In [8]: t
    Out[8]:
    array([[-1.,  0.,  0.,  0.,  0.],
           [ 0., -1.,  0.,  0.,  0.],
           [ 0.,  0., -1.,  0.,  0.],
           [ 0.,  0.,  0., -1.,  0.],
           [ 0.,  0.,  0.,  0., -1.]])
    
    In [9]: np.argmin(t,axis=1)         # 获取最小值位置
    Out[9]: array([0, 1, 2, 3, 4])
    

      

    numpy随机方法

    In [12]: np.random.rand(2,3)        # 创建二维三列的均匀分布范围0~1浮点型数组
    Out[12]:
    array([[0.06364283, 0.91082238, 0.78795567],
           [0.0627046 , 0.33476692, 0.5778516 ]])
    
    In [13]: np.random.randn(2,3)       # 创建二维三列的标准正态分布随机数,平均数为0标准差为1浮点型数组
    Out[13]:
    array([[ 0.06391798,  0.19011529, -0.17431257],
           [-0.45543116, -0.02290774,  0.11979098]])
    
    In [14]: np.random.randint(0,100,(2,3))     # 创建二维三列最低为0最高为99的随机整数
    Out[14]:
    array([[13, 98, 78],
           [36, 59, 97]])
    
    In [20]: np.random.uniform(0,5,(2,3))       # 创建二维三列最低为0最高为99的随机浮点型小数
    Out[20]:
    array([[4.95142868, 1.39926247, 4.21451073],
           [2.25316875, 2.67873448, 2.84466319]])
    
    
    np.random.seed(0)    # 随机数种子,这样每次生成相同的随机数,参数为给定的种子值
    t = np.random.randint(0, 10, (2, 3))
    print(t)
    

      

  • 相关阅读:
    MySQL 数据类型
    MySQL的相关概念介绍
    遍历Map的四种方法
    Hadoop在win7下部署的问题
    Hbase之shell操作
    问题-"Record not found or changed by another user"
    问题-Delphi编译到最后Linking时总是出现与ntdll.dll有关的错误还有Fatal Error Out of memory错误
    教程-CXGRID之cxDropDownEdit密密
    问题-delphi在某电脑(win7)上是界面超乱 DPL
    教程-Delphi调用C# WEBSERVICE(二)
  • 原文地址:https://www.cnblogs.com/xuyaping/p/13565077.html
Copyright © 2020-2023  润新知