1.手写数字数据集
- from sklearn.datasets import load_digits
- digits = load_digits()
from sklearn.datasets import load_digits digits = load_digits()
2.图片数据预处理
- x:归一化MinMaxScaler()
- y:独热编码OneHotEncoder()或to_categorical
- 训练集测试集划分
- 张量结构
# x : 归一化 from sklearn.preprocessing import MinMaxScaler import numpy as np x_data = digits.data.astype(np.float32) #数据转为float类型 #将属性缩放到一个指定的最大和最小值(通常0-1)之间 scaler = MinMaxScaler() X_data =scaler.fit_transform(x_data) #归一化 #转换为图片的模式(张量结构),-1样本量,8,8是图片尺寸,1是通道数目; x = X_data.reshape(-1,8,8,1) # y : 独热编码 from sklearn.preprocessing import OneHotEncoder y_data = digits.target.astype(np.float32).reshape(-1,1) #将y_data 变为一列 # print(y_data) y = OneHotEncoder().fit_transform(y_data).todense() #独热编码 并 转张量类型 #训练集和测试集划分 from sklearn.model_selection import train_test_split x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=0,stratify=y) print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
截图:
(训练集和测试集划分的结果)
3.设计卷积神经网络结构
- 绘制模型结构图,并说明设计依据。
依据:防止过拟化、数据的平铺。
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPool2D #建立模型 model = Sequential() ks = [3, 3] # 卷积核大小 #第一层输入数据的shape要指定外,其他层的数据的shape框架会自动推导 model.add(Conv2D(filters=16,kernel_size=ks,padding='same',input_shape=x_train.shape[1:],activation='relu')) #卷积层 model.add(MaxPool2D(pool_size=(2,2))) #池化层 model.add(Dropout(0.25)) #防止过拟合,丢带4分一的连接 model.add(Conv2D(filters=32,kernel_size=ks,padding='same',activation='relu')) model.add(MaxPool2D(pool_size=(2,2))) model.add(Dropout(0.25)) model.add(Conv2D(filters=64,kernel_size=ks,padding='same',activation='relu')) model.add(Conv2D(filters=128,kernel_size=ks,padding='same',activation='relu')) model.add(MaxPool2D(pool_size=(2,2))) model.add(Dropout(0.25)) model.add(Flatten()) #平坦层 model.add(Dense(128,activation='relu')) #全连接层 model.add(Dropout(0.25)) model.add(Dense(10,activation='softmax')) #要分成10类 #查看模型结构 model.summary()
截图:(结构图)
4.模型训练
import matplotlib.pyplot as plt model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) #优化器optimizer='adam' #一次进行128个数据进行处理,百分之20是验证数据 train_history = model.fit(x=x_train,y=y_train,validation_split=0.2,batch_size=128,epochs=10,verbose=2) score = model.evaluate(x_test,y_test) #模型自动评估 #预测值 y_pred = model.predict_classes(x_test) print(y_pred) #观察训练参数可视化 def show_train_history(train_histoty, train, validataion): plt.plot(train_history.history[train]) plt.plot(train_history.history[validataion]) plt.title('Train History') plt.ylabel('train') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show() #准确率 show_train_history(train_history, 'accuracy', 'val_accuracy') #损失率 show_train_history(train_history, 'loss', 'val_loss')
截图:
(数据处理)
(数据评估,和 预测)
(准确率可视化)
(损失率可视化)
5.模型评价
- model.evaluate()
- 交叉表与交叉矩阵
- pandas.crosstab
- seaborn.heatmap
#方法一:模型评估 model.evaluate(x_test,y_test)[1] #方法二:预测值 y_pre = model.predict_classes(x_test) y_pre[:10] #方法三:交叉表查看预测数据与原数据对比 y_test1 = np.argmax(y_test, axis=1).reshape(-1) #一维数组模式 y_true = np.array(y_test1)[0] import pandas as pd pd.crosstab(y_true, y_pred, rownames=['true'], colnames=['predict'])
# 交叉表与交叉矩阵
import seaborn as sns
import pandas as pd
y_test1 = y_test1.tolist()[0]
a = pd.crosstab(np.array(y_test1),y_pred)
df = pd.DataFrame(a) #转换成属dataframe
sns.heatmap(df,annot=True,cmap='Reds',linewidths=0.2, linecolor='G') #热图
截图:
(模型评估 和 预测值)
(交叉表查看预测数据与原数据对比)
(交叉表与交叉矩阵的可视化——热图)
全部代码
#1.手写数字数据集 from sklearn.datasets import load_digits digits = load_digits() #2.图片数据预处理 # x : 归一化 from sklearn.preprocessing import MinMaxScaler import numpy as np x_data = digits.data.astype(np.float32) #数据转为float类型 #将属性缩放到一个指定的最大和最小值(通常0-1)之间 scaler = MinMaxScaler() X_data =scaler.fit_transform(x_data) #归一化 #转换为图片的模式(张量结构),-1样本量,8,8是图片尺寸,1是通道数目; x = X_data.reshape(-1,8,8,1) # y : 独热编码 from sklearn.preprocessing import OneHotEncoder y_data = digits.target.astype(np.float32).reshape(-1,1) #将y_data 变为一列 # print(y_data) y = OneHotEncoder().fit_transform(y_data).todense() #独热编码 并 转张量类型 #训练集和测试集划分 from sklearn.model_selection import train_test_split x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=0,stratify=y) print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # 3.3.设计卷积神经网络结构 # 绘制模型结构图 from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPool2D #建立模型 model = Sequential() ks = [3, 3] # 卷积核大小 #第一层输入数据的shape要指定外,其他层的数据的shape框架会自动推导 model.add(Conv2D(filters=16,kernel_size=ks,padding='same',input_shape=x_train.shape[1:],activation='relu')) #卷积层 model.add(MaxPool2D(pool_size=(2,2))) #池化层 model.add(Dropout(0.25)) #防止过拟合,丢带4分一的连接 model.add(Conv2D(filters=32,kernel_size=ks,padding='same',activation='relu')) model.add(MaxPool2D(pool_size=(2,2))) model.add(Dropout(0.25)) model.add(Conv2D(filters=64,kernel_size=ks,padding='same',activation='relu')) model.add(Conv2D(filters=128,kernel_size=ks,padding='same',activation='relu')) model.add(MaxPool2D(pool_size=(2,2))) model.add(Dropout(0.25)) model.add(Flatten()) #平坦层 model.add(Dense(128,activation='relu')) #全连接层 model.add(Dropout(0.25)) model.add(Dense(10,activation='softmax')) #要分成10类 #查看模型结构 model.summary() # 3.4.模型训练 import matplotlib.pyplot as plt model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) #优化器optimizer='adam' #一次进行128个数据进行处理,百分之20是验证数据 train_history = model.fit(x=x_train,y=y_train,validation_split=0.2,batch_size=128,epochs=10,verbose=2) score = model.evaluate(x_test,y_test) #模型自动评估 #预测值 y_pred = model.predict_classes(x_test) print(y_pred) #观察训练参数可视化 def show_train_history(train_histoty, train, validataion): plt.plot(train_history.history[train]) plt.plot(train_history.history[validataion]) plt.title('Train History') plt.ylabel('train') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show() #准确率 show_train_history(train_history, 'accuracy', 'val_accuracy') #损失率 show_train_history(train_history, 'loss', 'val_loss') # 3.5.模型评价 #方法一:模型评估 model.evaluate(x_test,y_test)[1] #方法二:预测值 y_pre = model.predict_classes(x_test) y_pre[:10] #方法三:交叉表查看预测数据与原数据对比 y_test1 = np.argmax(y_test, axis=1).reshape(-1) #一维数组模式 y_true = np.array(y_test1)[0] import pandas as pd pd.crosstab(y_true, y_pred, rownames=['true'], colnames=['predict']) # 交叉表与交叉矩阵 import seaborn as sns import pandas as pd y_test1 = y_test1.tolist()[0] a = pd.crosstab(np.array(y_test1),y_pred) df = pd.DataFrame(a) #转换成属dataframe sns.heatmap(df,annot=True,cmap='Reds',linewidths=0.2, linecolor='G') #热图 # plt.savefig('venv/data/hot.png')