使用GPU
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
DNN
def baseline_model():
model = Sequential()
model.add(Dense(16, input_shape=(21, ), activation="relu"))
model.add(Dense(16, activation="relu"))
model.add(Dense(2, activation="sigmoid"))
model.compile(optimizer=RMSprop(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])
return model
def cross_validation(X, new_y, num_feat):
print("X=", X[:10])
print("X.values=", X.values[:10])
# print("y=", new_y[:10])
# X = X.values
y = to_categorical(new_y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666)
standScaler = StandardScaler()
standScaler.fit(X_train)
X_train = standScaler.transform(X_train)
X_test = standScaler.transform(X_test)
estimator = KerasClassifier(build_fn=baseline_model, epochs=10, batch_size=1, verbose=1)
kfold = KFold(n_splits=5, shuffle=True, random_state=999)
scores = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Accuracy of cross validation, mean %.2f, std %.2f" %(scores.mean(), scores.std()))
# clf = LogisticRegression(penalty = 'l2', solver = 'liblinear', class_weight = 'balanced')
# clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
# clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
# clf = svm.SVC(kernel= 'linear', C = 2e3)
# clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
# clf = GaussianNB()
# scores = cross_val_score(clf, X, new_y, cv = 10)
return scores
画准确率和损失曲线
def show_acc(history):
plt.clf()
history_dict = history.history
acc = history_dict['binary_accuracy']
val_acc = history_dict['val_binary_accuracy']
epochs = range(1, len(val_acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Balidation acc')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()
def show_loss(history):
plt.clf()
history_dict = history.history
print("print history.history = ", history_dict)
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(1, len(val_loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
Tensorboard
def classify_data(X, y, class_names):
y = to_categorical(y)
# 归一化
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666)
standScaler = StandardScaler()
standScaler.fit(X_train)
X_train = standScaler.transform(X_train)
X_test = standScaler.transform(X_test)
model = Sequential()
model.add(Dense(16, input_shape=(21, ), activation="relu"))
model.add(Dense(16, activation="relu"))
model.add(Dense(2, activation="sigmoid"))
model.summary()
model.compile(optimizer=RMSprop(lr=0.001), loss=binary_crossentropy, metrics=['accuracy'])
plot_model(model, show_shapes='True', to_file='model.png')
callbacks = [keras.callbacks.TensorBoard(log_dir="my_log_dir", histogram_freq=1, embeddings_freq=1, embeddings_data=X[:20].astype("float32"))]
history = model.fit(X_train, y_train, epoches=20, batch_size=1, validation_split=0.2, callbacks=callbacks)
画混淆矩阵
def plot_confusion_matrix(cm, classes,normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap) # 负责对图像进行处理,并显示其格式,但是不显示图
片
plt.title(title)
plt.colorbar() # 显示色阶
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45) # x轴标注
plt.yticks(tick_marks, classes) # y轴标注
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
# 依次取出list1中的每1个元素,与list2中的每1个元素,组成元组, 然后将所有的元组组成一个列表返回
# 矩阵行i为真实值,列j为预测值
# 也就是x为预测值,y为真实值
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label', fontsize = 14)
plt.xlabel('Predicted label', fontsize = 14)
def classify_data(X, y, class_names):
# 训练集测试集切分
global f_cv_scores
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize, random_state = 42)
# clf = LogisticRegression(penalty = 'l2', class_weight = 'balanced')
# clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
# clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
# clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
clf = svm.SVC(kernel= 'linear', C = 2e3)
# clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
# clf = GaussianNB()
y_pred = clf.fit(X_train, y_train).predict(X_test)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average = 'micro')
f1 = f1_score(y_test, y_pred, average = 'weighted')
acc = accuracy_score(y_test, y_pred)
Keras训练集、测试集与验证集
# 训练集与测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
# 训练集中抽20%作为验证集
history = model.fit(X_train, y_train, epochs=20, batch_size=1, shuffle=True, validation_split=0.1, verbose = 1, callbacks = None, validation_data = None))