准确率
import sys from class_vis import prettyPicture from prep_terrain_data import makeTerrainData import matplotlib.pyplot as plt import copy import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ########################## SVM ################################# ### we handle the import statement and SVC creation for you here from sklearn.svm import SVC clf = SVC(kernel="linear") clf.fit(features_train, labels_train) #### now your job is to fit the classifier #### using the training features/labels, and to #### make a set of predictions on the test data predictions = clf.predict(features_test) #### store your predictions in a list named pred pred = predictions from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) def submitAccuracy(): return acc
把非数字的列特征 转换成数字
def preprocess_features(X): ''' Preprocesses the student data and converts non-numeric binary variables into binary (0/1) variables. Converts categorical variables into dummy variables. ''' # Initialize new output DataFrame可 output = pd.DataFrame(index = X.index) # Investigate each feature column for the data for col, col_data in X.iteritems(): # If data type is non-numeric, replace all yes/no values with 1/0 if col_data.dtype == object: col_data = col_data.replace(['yes', 'no'], [1, 0]) # If data type is categorical, convert to dummy variables if col_data.dtype == object: # Example: 'school' => 'school_GP' and 'school_MS' col_data = pd.get_dummies(col_data, prefix = col) # Collect the revised columns output = output.join(col_data) return output X_all = preprocess_features(X_all) print "Processed feature columns ({} total features): {}".format(len(X_all.columns), list(X_all.columns))