skl4
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import numpy as np from sklearn import datasets from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier iris = datasets.load_iris() iris_X = iris.data iris_y = iris.target #print (iris_X[:2, :]) #print(iris_y) X_train, X_test, y_train, y_test = train_test_split( iris_X, iris_y, test_size = 0.3) #print(y_train) #会打乱数据 knn = KNeighborsClassifier() knn.fit(X_train, y_train) #自动完成train,knn是已经预测好了的 print(knn.predict(X_test)) print(y_test) y_pre = knn.predict(X_test) print (np.sum(y_pre - y_test))
skl5
使用数据
1 # -*- coding: utf-8 -*- 2 """ 3 Spyder Editor 4 5 This is a temporary script file. 6 """ 7 8 import numpy as np 9 from sklearn import datasets 10 from sklearn.linear_model import LinearRegression 11 12 loaded_data = datasets.load_boston() 13 data_X = loaded_data.data #属性 14 data_y = loaded_data.target 15 16 model = LinearRegression() 17 model.fit(data_X, data_y) 18 19 print (model.predict(data_X[:4,:])) 20 print (data_y[:4])
自己创建数据
1 import numpy as np 2 from sklearn import datasets 3 from sklearn.linear_model import LinearRegression 4 import matplotlib.pyplot as plt 5 6 X, y = datasets.make_regression(n_samples = 100, 7 n_features = 1, 8 n_targets = 1, 9 noise = 1) 10 11 12 plt.scatter(X,y) 13 plt.show()
skl6
属性
1 from sklearn import datasets 2 from sklearn.linear_model import LinearRegression 3 4 5 loaded_data = datasets.load_boston() 6 data_X = loaded_data.data 7 data_y = loaded_data.target 8 9 model = LinearRegression() 10 model.fit(data_X, data_y) 11 12 print(model.predict(data_X[:4,:])) 13 print(model.score(data_X, data_y)) 14 #0.7406 15 #R^2 coeddicient of determination
skl7
normalization
from sklearn import preprocessing
X = preprocessing.scale(X)
1 from sklearn import preprocessing 2 import numpy as np 3 from sklearn.cross_validation import train_test_split 4 from sklearn.datasets.samples_generator import make_classification 5 from sklearn.svm import SVC 6 import matplotlib.pyplot as plt 7 8 a = np.array([[10, 2.7, 3.6], 9 [-100, 5, -2], 10 [120, 20, 40]], dtype = np.float64) 11 #print(a) 12 #print(preprocessing.scale(a)) 13 14 X,y = make_classification(n_samples = 300, n_features = 2, 15 n_redundant = 0, n_informative = 2, 16 random_state = 22, n_clusters_per_class = 1, 17 scale = 100) 18 #plt.scatter(X[:,0], X[:,1], c=y) 19 #plt.show() 20 21 #X = preprocessing.minmax_scale(X,feature_range = (0,1)) 22 X = preprocessing.scale(X) 23 X_train, X_test,y_train,y_test = train_test_split(X, y,test_size = 0.3) 24 clf = SVC() 25 clf.fit(X_train, y_train) 26 print(clf.score(X_test, y_test))