• logistics二分类


    binaryclassification

    #DATASET: https://archive.ics.uci.edu/ml/datasets/Glass+Identification
    import
    numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn import sklearn.preprocessing as pre
    df=pd.read_csv('dataglassiglass.data')
    df.head()
     idRINaMgAlSiKCaBaFeclass
    0 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0.0 0.0 1
    1 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0.0 0.0 1
    2 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0.0 0.0 1
    3 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0.0 0.0 1
    4 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0.0 0.0 1
    X,y=df.iloc[:,1:-1],df.iloc[:,-1]
    X,y=np.array(X),np.array(y)

    #change the value the element

    for idx,class_name in enumerate(sorted(list(set(y)))):
    y[y==class_name]=idx

    y
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
           4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
           5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], dtype=int64)
    #make the matrix's elements 2 value
    #if element doesn't equals to 1 then make it 0
    #'1' stands for the '2' class
    
    for i in range(len(y)):
        if y[i]!=1:
            y[i]=0
    #split our training dataset randomly
    
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=44)
    X_train.shape,y_train.shape,X_test.shape,y_test.shape
    ((181, 9), (181,), (33, 9), (33,))
    f_mean=np.mean(X_train,axis=0)
    f_std=np.std(X_train,axis=0)
    f_mean,f_std
    (array([1.51832884e+00, 1.33736464e+01, 2.69287293e+00, 1.46425414e+00,
            7.26391160e+01, 5.17016575e-01, 8.95314917e+00, 1.71104972e-01,
            6.02762431e-02]),
     array([0.00300427, 0.79769555, 1.42353328, 0.49169919, 0.77056863,
            0.69105168, 1.42892902, 0.5002639 , 0.10131419]))
    #standardize training set
    
    X_train=(X_train-f_mean)/f_std
    X_test=(X_test-f_mean)/f_std

    theta = np.zeros((X_train.shape[1] + 1))
    theta.shape
    (10,)
    #add constant parameter
    
    X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
    X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)
    X_train.shape,X_test.shape,theta.shape
    ((181, 10), (33, 10), (10,))
    #initialize the parameter
    
    np.random.seed(42)
    theta = np.random.rand(*theta.shape)
    theta
    array([0.37454012, 0.95071431, 0.73199394, 0.59865848, 0.15601864,
           0.15599452, 0.05808361, 0.86617615, 0.60111501, 0.70807258])
    #cross_entropy_loss: loss function
    #h: hypothesis function
    #gradient: gradient function
    
    num_epoch=500000
    for epoch in range(num_epoch):
        logist = np.dot(X_train, theta)
        h = 1 / (1 + np.exp(-logist))
        cross_entropy_loss = (-y_train * np.log(h) - (1 - y_train) * np.log(1 - h)).mean()
        gradient = np.dot((h - y_train), X_train) / y_train.size
        theta = theta -  0.01*gradient
        if epoch%100000==0:
            print('Epoch={}	Loss={}'.format(epoch,cross_entropy_loss))
    Epoch=0	Loss=0.9770836920534414
    Epoch=100000	Loss=0.5884129057196792
    Epoch=200000	Loss=0.5828823869347305
    Epoch=300000	Loss=0.5798937167992417
    Epoch=400000	Loss=0.5782071252958373
    h_test = 1 / (1 + np.exp(-np.dot(X_test, theta)))
    
    #accurancy
    ((h_test > 0.5) == y_test).sum() / y_test.size
    0.8484848484848485
  • 相关阅读:
    C#学习(五)- 正则表达式等
    C#学习(四)
    C#学习(三)
    C#学习(二)
    终于装好了VS2013,开始!(一)
    简短的开始,C#学习分享地。
    java虚拟机之虚拟机类加载机制
    在用mybatis向MySQL数据库中插入时间时报错:Incorrect datetime value: '' for column '' at row 1
    什么是高并发 ,一些常见的处理方式
    基本类型和引用类型的区别
  • 原文地址:https://www.cnblogs.com/runsdeep/p/11542208.html
Copyright © 2020-2023  润新知