1.下面的代码是上一篇理论中的小例子
from sklearn.neighbors import KNeighborsClassifier # K近邻分类器
from sklearn.datasets import load_iris # 鸢尾花数据
from sklearn.tree import DecisionTreeClassifier #决策树分类器
from sklearn.model_selection import cross_val_score #交叉验证值函数
from sklearn.naive_bayes import GaussianNB #朴素贝叶斯分类器
import numpy as np #科学计算库
#小示例实现顺序与导包顺序相同
X=[[0],[1],[2],[3]]
y=[0,0,1,1]
neigh=KNeighborsClassifier(n_neighbors=3)
neigh.fit(X,y)
print("+++++K近邻+++++")
print(neigh.predict([[1.2]]))
clf=DecisionTreeClassifier()
irls=load_iris()
re=cross_val_score(clf,irls.data,irls.target,cv=10)
print("+++++交叉验证+++++")
print(re)
print("+++++决策树+++++")
clf.fit(X,y)
print(clf.predict([[2.2]]))
A=np.array([[-1,-1],[-2,-1],[-3,-2],[2,1],[1,1],[3,2]])
B=np.array([1,1,1,2,2,2])
clf1=GaussianNB(priors=None)
clf1.fit(A,B)
r=clf1.predict([[-0.8,-1]])
print("+++++朴素毕贝叶斯+++++")
print(r)
2.结果
+++++K近邻+++++
[0]
+++++交叉验证+++++
[ 1. 0.93333333 1. 0.93333333 0.93333333 0.86666667
0.93333333 0.93333333 1. 1. ]
+++++决策树+++++
[1]
+++++朴素毕贝叶斯+++++
[1]
3.利用mooc给的feature数据实践
import numpy as np
import pandas as pd
from sklearn.preprocessing import Imputer#数据预处理库
from sklearn.cross_validation import train_test_split #打乱训练数据
from sklearn.metrics import classification_report #计算召回率,F1值,精准度
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
def load_datasets(feature_path,lable_path):
#设定shape
feature=np.ndarray(shape=(0,41))
lable=np.ndarray(shape=(0,1))
#处理文件缺失值
for file in feature_path:
df=pd.read_table(file,delimiter=',',na_values="?",header=None)
imp=Imputer(missing_values='NaN',strategy='mean',axis=0)
imp.fit(df)
#Impute all missing values in X.
df=imp.transform(df)
feature=np.concatenate((feature,df))
for file in lable_path:
df=pd.read_table(file,header=None)
lable=np.concatenate((lable,df))
lable=np.ravel(lable)
return feature, lable
if __name__ == '__main__':
'''数据具体路径'''
featurepaths=['/A/A.feature',
'/B/B.feature',
'/C/C.feature',
'/D/D.feature',
'/E/E.feature'
]
labelPaths=['/A/A.label',
'/B/B.label',
'/C/C.label',
'/D/D.label',
'/E/E.label']
'''读如数据'''
x_train, y_train = load_datasets(featurepaths[:4], labelPaths[:4])
x_test, y_test = load_datasets(featurepaths[4:], labelPaths[4:])
#打乱训练数据
x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size=0.0)
#创建三种分类器并预测
print('Start training knn')
knn = KNeighborsClassifier().fit(x_train, y_train)
print('Training done')
answer_knn = knn.predict(x_test)
print('Prediction done')
print('Start training DT')
dt = DecisionTreeClassifier().fit(x_train, y_train)
print('Training done')
answer_dt = dt.predict(x_test)
print('Prediction done')
print('Start training Bayes')
gnb = GaussianNB().fit(x_train, y_train)
print('Training done')
answer_gnb = gnb.predict(x_test)
print('Prediction done')
#结果展示
'''
Build a text report showing the main classification metrics
classification_report&精确度/召回率/F1值
'''
print('
The classification report for knn:')
print(classification_report(y_test, answer_knn))
print('
The classification report for DT:')
print(classification_report(y_test, answer_dt))
print('
The classification report for Bayes:')
print(classification_report(y_test, answer_gnb))