• 银行分控模型的建立


    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.tree import DecisionTreeClassifier
    import seaborn as sns
    from sklearn.metrics import confusion_matrix
    from matplotlib import pyplot as plt
    from sklearn import tree
    from sklearn.metrics import accuracy_score
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import GaussianNB
    from sklearn.ensemble import RandomForestClassifier, VotingClassifier
    from sklearn import svm
    
    # 读取数据
    data_load = "bankloan.xls"
    data = pd.read_excel(data_load)
    
    data.describe()
    
    年龄 教育 工龄 地址 收入 负债率 信用卡负债 其他负债 违约
    count 700.000000 700.000000 700.000000 700.000000 700.000000 700.000000 700.000000 700.000000 700.000000
    mean 34.860000 1.722857 8.388571 8.278571 45.601429 10.260571 1.553553 3.058209 0.261429
    std 7.997342 0.928206 6.658039 6.824877 36.814226 6.827234 2.117197 3.287555 0.439727
    min 20.000000 1.000000 0.000000 0.000000 14.000000 0.400000 0.011696 0.045584 0.000000
    25% 29.000000 1.000000 3.000000 3.000000 24.000000 5.000000 0.369059 1.044178 0.000000
    50% 34.000000 1.000000 7.000000 7.000000 34.000000 8.600000 0.854869 1.987568 0.000000
    75% 40.000000 2.000000 12.000000 12.000000 55.000000 14.125000 1.901955 3.923065 1.000000
    max 56.000000 5.000000 31.000000 34.000000 446.000000 41.300000 20.561310 27.033600 1.000000
    data.columns
    
    Index(['年龄', '教育', '工龄', '地址', '收入', '负债率', '信用卡负债', '其他负债', '违约'], dtype='object')
    
    data.index
    
    RangeIndex(start=0, stop=700, step=1)
    
    ## 转为np 数据切割
    
    X = np.array(data.iloc[:,0:-1])
    y = np.array(data.iloc[:,-1])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, train_size=0.8, test_size=0.2, shuffle=True)
    

    决策树

    Dtree = DecisionTreeClassifier(max_leaf_nodes=3,random_state=13)
    Dtree.fit(X_train, y_train)
    y_pred = Dtree.predict(X_test)
    accuracy_score(y_test, y_pred)
    
    0.7285714285714285
    
    tree.plot_tree(Dtree)
    plt.show()
    

    ## seaborn
    
    cm = confusion_matrix(y_test, y_pred)
    heatmap = sns.heatmap(cm, annot=True, fmt='d')
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right')
    plt.ylabel("true label")
    plt.xlabel("predict label")
    plt.show()
    

    SVM

    svm = svm.SVC()
    svm.fit(X_test,y_test)
    y_pred = svm.predict(X_test)
    accuracy_score(y_test, y_pred)
    
    0.7857142857142857
    
    cm = confusion_matrix(y_test, y_pred)
    heatmap = sns.heatmap(cm, annot=True, fmt='d')
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right')
    plt.ylabel("true label")
    plt.xlabel("predict label")
    plt.show()
    

    ensemble

    # ensemble
    from sklearn import svm
    clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
    clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
    clf3 = GaussianNB()
    clf4 = svm.SVC()
    clf5 = DecisionTreeClassifier(max_leaf_nodes=3,random_state=13)
    eclf = VotingClassifier(estimators=[
            ('lr', clf1), ('rf', clf2), ('gnb', clf3), ('svm', clf4), ('dtree', clf5)], voting='hard')
    eclf = eclf.fit(X, y)
    y_pred = eclf.predict(X_test)
    accuracy_score(y_test, y_pred)
    
    0.8357142857142857
    
    cm = confusion_matrix(y_test, y_pred)
    heatmap = sns.heatmap(cm, annot=True, fmt='d')
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right')
    plt.ylabel("true label")
    plt.xlabel("predict label")
    plt.show()
    

    TORCH_BP_NETWORK

    import torch
    import torch.nn.functional as Fun
    
    train_x = torch.FloatTensor(X_train)
    train_y = torch.LongTensor(y_train)
    test_x = torch.FloatTensor(X_test)
    test_y = torch.LongTensor(y_test)
    
    # BP
    class Net(torch.nn.Module):
        def __init__(self, n_feature, n_hidden, n_output):
            super(Net, self).__init__()
            self.hidden = torch.nn.Linear(n_feature, n_hidden)   # 定义隐藏层网络
            self.out = torch.nn.Linear(n_hidden, n_output)   # 定义输出层网络
    
        def forward(self, x):
            x = Fun.relu(self.hidden(x))      # 隐藏层的激活函数,采用relu,也可以采用sigmod,tanh
            x = self.out(x)                   # 输出层不用激活函数
            return x
    
    net = Net(n_feature=8,n_hidden=20, n_output=2)    #n_feature:输入的特征维度,n_hiddenb:神经元个数,n_output:输出的类别个数
    optimizer = torch.optim.SGD(net.parameters(), lr=0.02) # 优化器选用随机梯度下降方式
    loss_func = torch.nn.CrossEntropyLoss() # 交叉熵损失函数
    
    loss_record = []
    for t in range(100):
        out = net(train_x)                 # 输入input,输出out
        loss = loss_func(out, train_y)     # 输出与label对比
        loss_record.append(loss.item())
        optimizer.zero_grad()   # 梯度清零
        loss.backward()         # 前馈操作
        optimizer.step()        # 使用梯度优化器
    
    out = net(test_x) #out是一个计算矩阵,可以用Fun.softmax(out)转化为概率矩阵
    prediction = torch.max(out, 1)[1] # 返回index  0返回原值
    pred_y = prediction.data.numpy()
    target_y = test_y.data.numpy()
    
    accuracy = float((pred_y == target_y).astype(int).sum()) / float(target_y.size)
    print(accuracy)
    
    0.8214285714285714
    
    ##
    ax = sns.lineplot(data = loss_record)
    plt.show()
    

    模型读取
    torch.save(net, 'net.pt')
    
    netC = torch.load('net.pt')
    input = test_x
    output = netC(input)
    accuracy = float((pred_y == target_y).astype(int).sum()) / float(target_y.size)
    print(accuracy)
    
    0.8214285714285714
    
    
    
  • 相关阅读:
    泛型冒泡排序继承IComparable接口
    C#中枚举与位枚举的区别和使用
    C#中把二维数组转为一维数组
    一维数组的冒泡排序
    C#控制台的两个二维数组相加
    vs2019连接MySql的连接字符串
    Ajax方法请求WebService实现多级联动
    kafka-manager无法启动解决方法
    SQL优化————Insert
    读写锁
  • 原文地址:https://www.cnblogs.com/leeing/p/16064230.html
Copyright © 2020-2023  润新知