• numpy模块、matplotlib模块、pandas模块


    1. numpy模块

    • numpy模块的作用

      用来做数据分析,对numpy数组(既有行又有列)——矩阵进行科学计算

    • 实例

      lt1 = [1, 2, 3]  # n个元素
      lt2 = [4, 5, 6]
      
      lt = []
      for i in range(len(lt1)):  # O(n)
          lt.append(lt1[i] * lt2[i])
      
      print(lt)
      
      import numpy as np  # 约定俗成的
      
      arr1 = np.array([1, 2, 3])
      arr2 = np.array([4, 5, 6])
      print(arr1 * arr2)
      
      # gpu --> 图形显卡
      
      # 创建numpy数组 --> 可变
      
      # 一维数组(不在讨论范围内)
      arr = np.array([1, 2, 4])
      print(type(arr), arr)
      
      # 二维数组(******)
      arr = np.array([
          [1, 2, 3],
          [4, 5, 6]
      ])
      print(arr)
      
      # 三维数组(不在讨论范围内)--》tensorflow
      arr3 = np.array([
          [[1, 2, 3],
           [4, 5, 6]],
          [[1, 2, 3],
           [4, 5, 6]],
      ])
      print(arr)
      
      # numpy数组的属性
      
      arr = np.array([
          [1, 2, 3],
          [4, 5, 6]
      ])
      
      # T	数组的转置(对高维数组而言) --> 行列互换,转置
      print(arr, '
      ', arr.T)
      
      # dtype	数组元素的数据类型,numpy数组是属于python解释器的;int32/float64属于numpy的
      print(arr.dtype)
      '''
      # 定制化的科学计算机
      11111111111111111111111111111111111111111
      '''
      # size	数组元素的个数
      print(arr.size)
      # ndim	数组的维数
      print(arr.ndim)
      print(arr3.ndim)
      # shape	数组的维度大小(以元组形式)
      print(arr.shape[0])
      print(arr.shape[1])
      # astype	类型转换
      arr = arr.astype(np.float64)
      print(arr)
      
      # 切片numpy数组
      lt = [1, 2, 3]
      
      print(lt[:])
      
      arr = np.array([
          [1, 2, 3],
          [4, 5, 6]
      ])
      
      print(arr[:, :])  # 行,列
      
      print(arr[0, 0])
      
      print(arr[0, :])
      
      print(arr[:, -2:])
      
      # 逻辑取值
      print(arr[arr > 4])
      
      # 赋值
      lt = [1, 2, 3]
      
      lt[:] = [0, 0, 0]
      print(lt)
      
      arr = np.array([
          [1, 2, 3],
          [4, 5, 6]
      ])
      
      arr[0, 0] = 0
      print(arr)
      
      arr[0, :] = 0
      print(arr)
      
      arr[:, :] = 0
      print(arr)
      
      # 数组的合并
      
      arr1 = np.array([
          [1, 2, 3],
          [4, 5, 6]
      ])
      
      arr2 = np.array([
          [7, 8, 9],
          ['a', 'b', 'c']
      ])
      
      print(np.hstack((arr1, arr2)))  # 只能放元组
      
      print(np.vstack((arr1, arr2)))
      
      print(np.concatenate((arr1, arr2), axis=1))  # 默认以列合并 # 0表示列,1表示行
      
      # 通过函数创建numpy数组
      
      print(np.ones((2, 3)))
      
      print(np.zeros((2, 3)))
      
      print(np.eye(3, 3))
      
      print(np.linspace(1, 100, 10))
      
      print(np.arange(2, 10))
      
      arr1 = np.zeros((1, 12))
      print(arr1.reshape((3, 4)))  # 重构形状
      
      # numpy数组运算
      
      # +-*'
      arr1 = np.ones((3, 4)) * 4
      print(arr1)
      
      # numpy数组运算函数
      
      print(np.sin(arr1))
      
      # 矩阵运算--点乘
      
      arr1 = np.array([
          [1, 2, 3],
          [4, 5, 6]
      ])
      
      arr2 = np.array([
          [1, 2],
          [4, 5],
          [6, 7]
      ])
      # 2* 3 3*2
      print(np.dot(arr1, arr2))
      
      # 求逆
      arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]])
      print(np.linalg.inv(arr))
      
      # numpy数组数学和统计方法
      print(np.sum(arr[0, :]))
      
      # numpy.random生成随机数(******)
      print(np.random.rand(3, 4))
      
      print(np.random.random((3, 4)))
      
      # np.random.seed(1)
      print(np.random.random((3, 4)))
      
      s = np.random.RandomState(1)
      print(s.random((3, 4)))
      
      arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]])
      np.random.shuffle(arr)
      print(arr)
      
      # 针对一维
      print(np.random.choice([1, 2, 3], 1))
      
      # 针对某一个范围
      print(np.random.randint(1, 100, (3, 4)))
      

    2. matplotlib模块

    • matplotlib模块的作用

      画图(画各种与数据相关的图)

    • 实例

      # 条形图
      # from matplotlib import pyplot as plt  # 约定俗成
      # from matplotlib.font_manager import FontProperties  # 修改字体
      #
      # font = FontProperties(fname='C:WindowsFontssimsun.ttc')
      #
      # plt.style.use('ggplot')  # 设置背景
      #
      # clas = ['3班', '4班', '5班', '6班']
      # students = [50, 55, 45, 60]
      # clas_index = range(len(clas))
      #
      # # [0,1,2,3] [50,55,45,60]
      # plt.bar(clas_index,students,color='darkblue')
      #
      # plt.xlabel('学生',fontproperties=font)
      # plt.ylabel('学生人数',fontproperties=font)
      # plt.title('班级-学生人数',fontproperties=font,fontsize=20,fontweight=25)
      # plt.xticks(clas_index,clas,fontproperties=font)
      #
      # plt.show()
      
      
      # # 直方图
      # import numpy as np
      # from matplotlib import pyplot as plt  # 约定俗成
      # from matplotlib.font_manager import FontProperties  # 修改字体
      #
      # font = FontProperties(fname='C:WindowsFontssimsun.ttc')
      #
      # plt.style.use('ggplot')
      #
      # x1 = np.random.randn(10000)
      #
      # x2 = np.random.randn(10000)
      #
      # fig = plt.figure()  # 生成一张画布
      # ax1 = fig.add_subplot(1, 2, 1)  # 1行2列取第一个
      # ax2 = fig.add_subplot(1, 2, 2)
      #
      # ax1.hist(x1, bins=50,color='darkblue')
      # ax2.hist(x2, bins=50,color='y')
      #
      # fig.suptitle('两个正太分布',fontproperties=font,fontsize=20)
      # ax1.set_title('x1的正太分布',fontproperties=font)  # 加子标题
      # ax2.set_title('x2的正太分布',fontproperties=font)
      # plt.show()
      
      
      # 折线图
      #
      # import numpy as np
      # from matplotlib import pyplot as plt  # 约定俗成
      # from matplotlib.font_manager import FontProperties  # 修改字体
      #
      # font = FontProperties(fname='C:WindowsFontssimsun.ttc')
      #
      # plt.style.use('ggplot')
      #
      # np.random.seed(10)
      # x1 = np.random.randn(40).cumsum()
      # x2 = np.random.randn(40).cumsum()
      # x3 = np.random.randn(40).cumsum()
      # x4 = np.random.randn(40).cumsum()
      #
      # plt.plot(x1, c='r', linestyle='-', marker='o', label='红圆线')
      # plt.plot(x2, color='y', linestyle='--', marker='*', label='黄虚线')
      # plt.plot(x3, color='b', linestyle='-.', marker='s', label='蓝方线')
      # plt.plot(x4, color='black', linestyle=':', marker='s', label='黑方线')
      # plt.legend(loc='best', prop=font)  # 显示label
      # plt.show()
      
      
      # 散点图+直线图
      import numpy as np
      from matplotlib import pyplot as plt  # 约定俗成
      from matplotlib.font_manager import FontProperties  # 修改字体
      
      font = FontProperties(fname='C:WindowsFontssimsun.ttc')
      
      plt.style.use('ggplot')
      
      fig = plt.figure()
      ax1 = fig.add_subplot(1, 2, 1)
      ax2 = fig.add_subplot(1, 2, 2)
      
      x = np.arange(20)
      y = x ** 2
      
      x2 = np.arange(20)
      y2 = x2
      
      ax1.scatter(x, y, c='r', label='红')
      ax1.scatter(x2, y2, c='b', label='蓝')
      
      ax2.plot(x, y)
      ax2.plot(x2, y2)
      
      fig.suptitle('两张图', fontproperties=font, fontsize=15)
      ax1.set_title('散点图', fontproperties=font)
      ax2.set_title('折线图', fontproperties=font)
      ax1.legend(prop=font)
      plt.show()
      

    3. pandas模块

    • pandas模块的作用

      操作各种文本文件(如 excel / json / sql / ini / csv 等)

    • 实例

      # import pandas as pd
      #
      # df = pd.read_csv('test.csv',header=None)
      # df.to_excel('test.xls')
      
      
      # pd从excel中读取 DataFrame数据类型
      import numpy as np
      import pandas as pd
      
      np.random.seed(10)
      
      index = pd.date_range('2019-01-01', periods=6, freq='M')
      print(index)
      columns = ['c1', 'c2', 'c3', 'c4']
      print(columns)
      val = np.random.randn(6, 4)
      print(val)
      
      df = pd.DataFrame(index=index, columns=columns, data=val)
      print(df)
      
      # 保存文件,读出成文件
      df.to_excel('date_c.xlsx')
      
      # 读出文件
      df = pd.read_excel('date_c.xlsx', index_col=[0])
      print(df)
      
      print(df.index)
      print(df.columns)
      print(df.values)
      
      print(df[['c1', 'c2']])
      
      # 按照index取值
      # print(df['2019-01-31'])
      print(df.loc['2019-01-31'])
      print(df.loc['2019-01-31':'2019-05-31'])
      
      # 按照values取值
      print(df)
      print(df.iloc[0, 0])
      
      df.iloc[0, :] = 0
      
      
  • 相关阅读:
    2
    异常处理
    接口
    抽象与多态
    关联关系
    9-13
    数据类型转换
    Day3
    对象和类
    MyEclipse导入现成项目出现小红叉错误
  • 原文地址:https://www.cnblogs.com/Mcoming/p/11608546.html
Copyright © 2020-2023  润新知