KNN算法为按距离进行分类的,对于已知的分类,根据欧式距离,最靠近那个分类就被预测为那个分类。
本文只是简单展示一下实现代码,具体的特征和分类,还得自己根据实际场景去调整。
在开始之前注意看看导入的包是否都存在,如不存在的化,请先安装相应的包
# -*- coding:utf-8 -*- import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import cross_val_score import matplotlib.pyplot as plt import joblib import pandas as pd #加载iris数据集 iris = datasets.load_iris() iris_X = iris.data iris_y = iris.target #print(iris_X[:4,:]) #数据分割 X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=1/3,random_state=3) #==========交叉验证============================ #cv_scores = [] #k_range = range(1,31) ''' #此处为交叉验证,看KNN的k取什么值的时候效果最好 for n in k_range: knn = KNeighborsClassifier(n_neighbors=n) scores = cross_val_score(knn,X_train,y_train,cv=10,scoring='accuracy') cv_scores.append(scores.mean()) plt.plot(k_range,cv_scores) plt.xlabel('K') plt.ylabel('Accuracy') plt.show() ''' #模型训练 ''' best_knn = KNeighborsClassifier(n_neighbors=3) # 选择最优的K=3传入模型 best_knn.fit(X_train,y_train) #训练模型 print(best_knn.score(X_test,y_test)) #看看评分 #模型本地保存 joblib.dump(best_knn, 'D:/Users/wangkangren729/PycharmProjects/iris/model/best_knn.pkl',compress=3) #load model ''' bknn = joblib.load('D:/Users/wangkangren729/PycharmProjects/iris/model/best_knn.pkl') #读取本地新数据 data = pd.read_csv('predict.data') #print(data.head(5)) attributes=data[['sl','sw','pl','pw']] #前四列属性简化为sl,sw,pl,pw types=data['type'] #第5列属性为鸢尾花的类别 #print(type(attributes)) #data_frame = attributes.loc[0,:].to_frame() #print(attributes) #print(type(attributes[i])) #预测新数据 print(bknn.predict(attributes)) #print(type([[4.1, 2.2, 2.3, 5.4]])) #print([[4.1, 2.2, 2.3, 5.4]]) #print(bknn.predict([[4.1, 2.2, 2.3, 5.4]])) #print(types) #print(bknn.predict(attributes))