• softmax使用python代码实现


    # softmax使用python代码实现训练<鸾尾花数据.csv>,(3分类)
    #1) 手写softmax算法方式实现。
    #2) 需要把数据划分训练测试集(7:3) 70%数据训练,30%测试
    #3) 画出训练的损失值曲线
    #4)计算混淆举证,准确率,召回率,精准率,f1
    #https://zhuanlan.zhihu.com/p/369936908?ivk_sa=1024320u
    #https://zhuanlan.zhihu.com/p/73558315
    #https://www.cnblogs.com/mtcnn/articles/9412567.html

    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import confusion_matrix

    df=pd.read_csv('../鸢尾花数据.csv')
    train_set, test_set = train_test_split(df, test_size = 0.3)
    D = 4 # 维度,特征数
    K = 3 # 类别数
    cycle_run=200


    X=np.array(train_set.values[:,:4])#数据矩阵
    y=np.array(train_set.values[:,4])#分类标签矩阵
    y=y.astype(int)#将标签转化为int,否则后续的索引会报错(必须为int或者bool)

    #可视化数据
    plt.scatter(X[:,0], X[:,1], s=40, c=y, alpha=0.5)
    plt.scatter(X[:,2], X[:,3], s=40, c=y, alpha=0.5)
    plt.show()

    #训练softmax
    # 参数初始化
    W = 0.01 * np.random.randn(D,K)
    b = np.zeros((1,K))
    print('参数初始化',W,b)
    #超参数
    step_size = 1e-0#学习率
    reg = 1e-3 # 正则化强度


    #梯度下降
    num_examples = X.shape[0]
    print(type(num_examples))

    loss_all=[]#将所有损失值函数都放在一起便于绘图
    for i in range(cycle_run):
    # evaluate class scores, [N x K]评估分类成绩
    scores = np.dot(X, W) + b
    #计算分类可能性
    exp_scores = np.exp(scores)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # [N x K]
    #计算损失函数
    corect_logprobs = -np.log(probs[range(num_examples),y])
    data_loss = np.sum(corect_logprobs)/num_examples
    reg_loss = 0.5*reg*np.sum(W*W)
    loss = data_loss + reg_loss
    if i % 10 == 0:
    #每十次输出一次计算结果
    print("迭代第 %d次: 损失函数loss %f" % (i, loss))
    loss_all.append(loss)

    # 对分数计算梯度
    dscores = probs
    dscores[range(num_examples),y] -= 1
    dscores /= num_examples

    # 对参数做梯度
    dW = np.dot(X.T, dscores)
    db = np.sum(dscores, axis=0, keepdims=True)

    dW += reg*W #梯度正则化

    # 更新参数
    W += -step_size * dW
    b += -step_size * db

    #绘制损失值的曲线
    #绘图
    cycle_time=[m for m in range(int(cycle_run/10))]
    plt.plot(cycle_time,loss_all)
    #展示
    plt.show()

    # 估计训练结果准确率
    scores = np.dot(X, W) + b
    predicted_class = np.argmax(scores, axis=1)
    print(predicted_class)
    print(y)
    print ('训练准确率: %.2f' % (np.mean(predicted_class == y)))

    #验证集
    X2=np.array(test_set.values[:,:4])#data matrix (each row = single example)数据矩阵
    y2=np.array(test_set.values[:,4])# class labels分类标签矩阵
    y2=y2.astype(int)#将标签转化为int,否则后续的索引会报错(必须为int或者bool)
    scores2 = np.dot(X2, W) + b
    predicted_class2 = np.argmax(scores2, axis=1)
    print ('测试准确率: %.2f' % (np.mean(predicted_class2 == y2)))


    # 计算混淆举证,准确率,召回率,精准率,f1
    y_test=y2
    y_log_predict=predicted_class2

    cm=confusion_matrix(y_test,y_log_predict)
    FP = cm.sum(axis=0) - np.diag(cm)
    FN = cm.sum(axis=1) - np.diag(cm)
    TP = np.diag(cm)
    TN = cm.sum() - (FP + FN + TP)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Specificity or true negative rate
    TNR = TN/(TN+FP)
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate
    FPR = FP/(FP+TN)
    # False negative rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)
    acc=((TP+TN)/(TP+TN+FN+FP))#准确率
    precision = TP / (TP+FP) # 查准率
    recall = TP / (TP+FN) # 查全率,召回率
    f1=2*(precision*recall/(precision+recall))#f1
    print('稀疏矩阵:',cm)
    print('准确率:',acc)
    print('查准率:',precision)
    print('召回率:',recall)
    print('f1:',f1)
  • 相关阅读:
    图解C/C++多级指针与多维数组
    排序---选择排序
    排序---插入排序
    排序---希尔排序
    Merge Two Sorted Lists
    Remove Nth Node From End of List
    如何阅读Django文档?
    机器学习 第一章 绪论 笔记
    python工程实践·笔记
    Python后端开发面经
  • 原文地址:https://www.cnblogs.com/xingnie/p/16221084.html
Copyright © 2020-2023  润新知