• [学习笔记] [Python机器学习 预测分析核心算法][3.3 度量预测模型性能]


    直接上我的思维导图吧:

    度量预测模型性能

    学习的代码及备注

      1 __author__ = 'mike-bowles'
      2 #use scikit learn package to perform linear regression
      3 #read in the rocks versus mines data set from uci.edu data repository
      4 # import urllib2  我没有用网上的数据,是先下载下来,直接本地读取的,所以取消掉了。
      5 import numpy
      6 import random
      7 from sklearn import datasets, linear_model
      8 from sklearn.metrics import roc_curve, auc
      9 import pylab as pl
     10 
     11 
     12 def confusionMatrix(predicted, actual, threshold): # predicted 利用回归模型后X输入得到的值, 
     13 #                                                    actual X所对应的真实的值, threshold 阈值
     14     if len(predicted) != len(actual): return -1 #判断测试的数量与实际的数量是否一致
     15     tp = 0.0
     16     fp = 0.0
     17     tn = 0.0
     18     fn = 0.0
     19     for i in range(len(actual)):
     20         if actual[i] > 0.5: #labels that are 1.0  (positive examples) 
     21                             # 为什么是>0.5,我想了好久,最后发现因为本例中为M时赋值1,为R时赋值0
     22             if predicted[i] > threshold:
     23                 tp += 1.0 #correctly predicted positive
     24             else:
     25                 fn += 1.0 #incorrectly predicted negative
     26         else:              #labels that are 0.0 (negative examples)
     27             if predicted[i] < threshold:
     28                 tn += 1.0 #correctly predicted negative
     29             else:
     30                 fp += 1.0 #incorrectly predicted positive
     31     rtn = [tp, fn, fp, tn]
     32     return rtn
     33 
     34 
     35 #read data from uci data repository
     36 # target_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
     37 # data = urllib2.urlopen(target_url)
     38 # 这里我改成了:
     39 target_url_file = open('sonar.all-data','r')
     40 data = target_url_file.readlines()
     41 target_url_file.close()
     42 
     43 
     44 #arrange data into list for labels and list of lists for attributes
     45 xList = []
     46 labels = []
     47 for line in data:
     48     #split on comma
     49     row = line.strip().split(",")
     50     #assign label 1.0 for "M" and 0.0 for "R"
     51     if(row[-1] == 'M'):
     52         labels.append(1.0)
     53     else:
     54         labels.append(0.0)
     55     #remove label from row
     56     row.pop()
     57     #convert row to floats
     58     floatRow = [float(num) for num in row]
     59     xList.append(floatRow)  #注意了xList是一个含208个列表的列表
     60 
     61 #divide attribute matrix and label vector into training(2/3 of data) and test sets (1/3 of data)
     62 indices = range(len(xList))
     63 xListTest = [xList[i] for i in indices if i%3 == 0 ]
     64 xListTrain = [xList[i] for i in indices if i%3 != 0 ]
     65 labelsTest = [labels[i] for i in indices if i%3 == 0]
     66 labelsTrain = [labels[i] for i in indices if i%3 != 0]
     67 
     68 #form list of list input into numpy arrays to match input class for scikit-learn linear model
     69 xTrain = numpy.array(xListTrain);    #将列表转为可以方便处理的numpy.array
     70 yTrain = numpy.array(labelsTrain); 
     71 xTest = numpy.array(xListTest); 
     72 yTest = numpy.array(labelsTest)
     73 
     74 #check shapes to see what they look like
     75 print("Shape of xTrain array", xTrain.shape)
     76 print("Shape of yTrain array", yTrain.shape)
     77 print("Shape of xTest array", xTest.shape)
     78 print("Shape of yTest array", yTest.shape)
     79 
     80 #train linear regression model
     81 rocksVMinesModel = linear_model.LinearRegression()  
     82 rocksVMinesModel.fit(xTrain,yTrain) #利用linear_model.LinearRegression()包进行线性回归
     83 
     84 #generate predictions on in-sample error
     85 trainingPredictions = rocksVMinesModel.predict(xTrain) #用训练集的X导入得到回归模型算出的预测值,注意不是yTrain
     86 print("Some values predicted by model", trainingPredictions[0:5], trainingPredictions[-6:-1])
     87 
     88 #generate confusion matrix for predictions on training set (in-sample) 
     89 # 一定理解 in-sample 和 out-of-sample 一个是训练集,一个是测试集
     90 confusionMatTrain = confusionMatrix(trainingPredictions, yTrain, 0.5)
     91 #pick threshold value and generate confusion matrix entries
     92 tp = confusionMatTrain[0]; fn = confusionMatTrain[1]; fp = confusionMatTrain[2]; tn = confusionMatTrain[3]
     93 
     94 print("tp = " + str(tp) + "	fn = " + str(fn) + "
    " + "fp = " + str(fp) + "	tn = " + str(tn) + '
    ') 
     95 # 根据输出理解	和
    ,并注意写法。
     96 
     97 #generate predictions on out-of-sample data 
     98 testPredictions = rocksVMinesModel.predict(xTest)  #用测试集数据导入训练好的模型
     99 
    100 #generate confusion matrix from predictions on out-of-sample data
    101 conMatTest = confusionMatrix(testPredictions, yTest, 0.5)
    102 #pick threshold value and generate confusion matrix entries
    103 tp = conMatTest[0]; fn = conMatTest[1]; fp = conMatTest[2]; tn = conMatTest[3]
    104 print("tp = " + str(tp) + "	fn = " + str(fn) + "
    " + "fp = " + str(fp) + "	tn = " + str(tn) + '
    ')
    105 
    106 #generate ROC curve for in-sample
    107 
    108 fpr, tpr, thresholds = roc_curve(yTrain,trainingPredictions)
    109 roc_auc = auc(fpr, tpr)
    110 print( 'AUC for in-sample ROC curve: %f' % roc_auc)
    111 
    112 # Plot ROC curve
    113 pl.clf()
    114 pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    115 pl.plot([0, 1], [0, 1], 'k--')
    116 pl.xlim([0.0, 1.0])
    117 pl.ylim([0.0, 1.0])
    118 pl.xlabel('False Positive Rate')
    119 pl.ylabel('True Positive Rate')
    120 pl.title('In sample ROC rocks versus mines')
    121 pl.legend(loc="lower right")
    122 pl.show()
    123 
    124 #generate ROC curve for out-of-sample
    125 fpr, tpr, thresholds = roc_curve(yTest,testPredictions)
    126 roc_auc = auc(fpr, tpr)
    127 print( 'AUC for out-of-sample ROC curve: %f' % roc_auc)
    128 
    129 # Plot ROC curve
    130 pl.clf()
    131 pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    132 pl.plot([0, 1], [0, 1], 'k--')
    133 pl.xlim([0.0, 1.0])
    134 pl.ylim([0.0, 1.0])
    135 pl.xlabel('False Positive Rate')
    136 pl.ylabel('True Positive Rate')
    137 pl.title('Out-of-sample ROC rocks versus mines')
    138 pl.legend(loc="lower right")
    139 pl.show()
  • 相关阅读:
    SpringMVC视图解析器
    FreeMarker介绍
    一篇很全面的freemarker教程
    request,session,application
    nav布局 在线演示 DIVCSS5
    opacity
    java 某字符串在另一字符串中是否存在
    bochs 使用讲解
    使用VS2015搭建Lua开发环境
    Zip文件格式
  • 原文地址:https://www.cnblogs.com/jaysonguan/p/12381197.html
Copyright © 2020-2023  润新知