1. numpy模块
-
numpy模块的作用
用来做数据分析,对numpy数组(既有行又有列)——矩阵进行科学计算
-
实例
lt1 = [1, 2, 3] # n个元素 lt2 = [4, 5, 6] lt = [] for i in range(len(lt1)): # O(n) lt.append(lt1[i] * lt2[i]) print(lt) import numpy as np # 约定俗成的 arr1 = np.array([1, 2, 3]) arr2 = np.array([4, 5, 6]) print(arr1 * arr2) # gpu --> 图形显卡 # 创建numpy数组 --> 可变 # 一维数组(不在讨论范围内) arr = np.array([1, 2, 4]) print(type(arr), arr) # 二维数组(******) arr = np.array([ [1, 2, 3], [4, 5, 6] ]) print(arr) # 三维数组(不在讨论范围内)--》tensorflow arr3 = np.array([ [[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]], ]) print(arr) # numpy数组的属性 arr = np.array([ [1, 2, 3], [4, 5, 6] ]) # T 数组的转置(对高维数组而言) --> 行列互换,转置 print(arr, ' ', arr.T) # dtype 数组元素的数据类型,numpy数组是属于python解释器的;int32/float64属于numpy的 print(arr.dtype) ''' # 定制化的科学计算机 11111111111111111111111111111111111111111 ''' # size 数组元素的个数 print(arr.size) # ndim 数组的维数 print(arr.ndim) print(arr3.ndim) # shape 数组的维度大小(以元组形式) print(arr.shape[0]) print(arr.shape[1]) # astype 类型转换 arr = arr.astype(np.float64) print(arr) # 切片numpy数组 lt = [1, 2, 3] print(lt[:]) arr = np.array([ [1, 2, 3], [4, 5, 6] ]) print(arr[:, :]) # 行,列 print(arr[0, 0]) print(arr[0, :]) print(arr[:, -2:]) # 逻辑取值 print(arr[arr > 4]) # 赋值 lt = [1, 2, 3] lt[:] = [0, 0, 0] print(lt) arr = np.array([ [1, 2, 3], [4, 5, 6] ]) arr[0, 0] = 0 print(arr) arr[0, :] = 0 print(arr) arr[:, :] = 0 print(arr) # 数组的合并 arr1 = np.array([ [1, 2, 3], [4, 5, 6] ]) arr2 = np.array([ [7, 8, 9], ['a', 'b', 'c'] ]) print(np.hstack((arr1, arr2))) # 只能放元组 print(np.vstack((arr1, arr2))) print(np.concatenate((arr1, arr2), axis=1)) # 默认以列合并 # 0表示列,1表示行 # 通过函数创建numpy数组 print(np.ones((2, 3))) print(np.zeros((2, 3))) print(np.eye(3, 3)) print(np.linspace(1, 100, 10)) print(np.arange(2, 10)) arr1 = np.zeros((1, 12)) print(arr1.reshape((3, 4))) # 重构形状 # numpy数组运算 # +-*' arr1 = np.ones((3, 4)) * 4 print(arr1) # numpy数组运算函数 print(np.sin(arr1)) # 矩阵运算--点乘 arr1 = np.array([ [1, 2, 3], [4, 5, 6] ]) arr2 = np.array([ [1, 2], [4, 5], [6, 7] ]) # 2* 3 3*2 print(np.dot(arr1, arr2)) # 求逆 arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]]) print(np.linalg.inv(arr)) # numpy数组数学和统计方法 print(np.sum(arr[0, :])) # numpy.random生成随机数(******) print(np.random.rand(3, 4)) print(np.random.random((3, 4))) # np.random.seed(1) print(np.random.random((3, 4))) s = np.random.RandomState(1) print(s.random((3, 4))) arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]]) np.random.shuffle(arr) print(arr) # 针对一维 print(np.random.choice([1, 2, 3], 1)) # 针对某一个范围 print(np.random.randint(1, 100, (3, 4)))
2. matplotlib模块
-
matplotlib模块的作用
画图(画各种与数据相关的图)
-
实例
# 条形图 # from matplotlib import pyplot as plt # 约定俗成 # from matplotlib.font_manager import FontProperties # 修改字体 # # font = FontProperties(fname='C:WindowsFontssimsun.ttc') # # plt.style.use('ggplot') # 设置背景 # # clas = ['3班', '4班', '5班', '6班'] # students = [50, 55, 45, 60] # clas_index = range(len(clas)) # # # [0,1,2,3] [50,55,45,60] # plt.bar(clas_index,students,color='darkblue') # # plt.xlabel('学生',fontproperties=font) # plt.ylabel('学生人数',fontproperties=font) # plt.title('班级-学生人数',fontproperties=font,fontsize=20,fontweight=25) # plt.xticks(clas_index,clas,fontproperties=font) # # plt.show() # # 直方图 # import numpy as np # from matplotlib import pyplot as plt # 约定俗成 # from matplotlib.font_manager import FontProperties # 修改字体 # # font = FontProperties(fname='C:WindowsFontssimsun.ttc') # # plt.style.use('ggplot') # # x1 = np.random.randn(10000) # # x2 = np.random.randn(10000) # # fig = plt.figure() # 生成一张画布 # ax1 = fig.add_subplot(1, 2, 1) # 1行2列取第一个 # ax2 = fig.add_subplot(1, 2, 2) # # ax1.hist(x1, bins=50,color='darkblue') # ax2.hist(x2, bins=50,color='y') # # fig.suptitle('两个正太分布',fontproperties=font,fontsize=20) # ax1.set_title('x1的正太分布',fontproperties=font) # 加子标题 # ax2.set_title('x2的正太分布',fontproperties=font) # plt.show() # 折线图 # # import numpy as np # from matplotlib import pyplot as plt # 约定俗成 # from matplotlib.font_manager import FontProperties # 修改字体 # # font = FontProperties(fname='C:WindowsFontssimsun.ttc') # # plt.style.use('ggplot') # # np.random.seed(10) # x1 = np.random.randn(40).cumsum() # x2 = np.random.randn(40).cumsum() # x3 = np.random.randn(40).cumsum() # x4 = np.random.randn(40).cumsum() # # plt.plot(x1, c='r', linestyle='-', marker='o', label='红圆线') # plt.plot(x2, color='y', linestyle='--', marker='*', label='黄虚线') # plt.plot(x3, color='b', linestyle='-.', marker='s', label='蓝方线') # plt.plot(x4, color='black', linestyle=':', marker='s', label='黑方线') # plt.legend(loc='best', prop=font) # 显示label # plt.show() # 散点图+直线图 import numpy as np from matplotlib import pyplot as plt # 约定俗成 from matplotlib.font_manager import FontProperties # 修改字体 font = FontProperties(fname='C:WindowsFontssimsun.ttc') plt.style.use('ggplot') fig = plt.figure() ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 2) x = np.arange(20) y = x ** 2 x2 = np.arange(20) y2 = x2 ax1.scatter(x, y, c='r', label='红') ax1.scatter(x2, y2, c='b', label='蓝') ax2.plot(x, y) ax2.plot(x2, y2) fig.suptitle('两张图', fontproperties=font, fontsize=15) ax1.set_title('散点图', fontproperties=font) ax2.set_title('折线图', fontproperties=font) ax1.legend(prop=font) plt.show()
3. pandas模块
-
pandas模块的作用
操作各种文本文件(如 excel / json / sql / ini / csv 等)
-
实例
# import pandas as pd # # df = pd.read_csv('test.csv',header=None) # df.to_excel('test.xls') # pd从excel中读取 DataFrame数据类型 import numpy as np import pandas as pd np.random.seed(10) index = pd.date_range('2019-01-01', periods=6, freq='M') print(index) columns = ['c1', 'c2', 'c3', 'c4'] print(columns) val = np.random.randn(6, 4) print(val) df = pd.DataFrame(index=index, columns=columns, data=val) print(df) # 保存文件,读出成文件 df.to_excel('date_c.xlsx') # 读出文件 df = pd.read_excel('date_c.xlsx', index_col=[0]) print(df) print(df.index) print(df.columns) print(df.values) print(df[['c1', 'c2']]) # 按照index取值 # print(df['2019-01-31']) print(df.loc['2019-01-31']) print(df.loc['2019-01-31':'2019-05-31']) # 按照values取值 print(df) print(df.iloc[0, 0]) df.iloc[0, :] = 0