选择的是手写数字识别。
采用sklearn.ensemble.RandomForestClassifier
下面贴上代码
import pandas as pd import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier train_data = pd.read_csv('data/train.csv') test_data = pd.read_csv('data/test.csv') clean_data = pd.read_csv('data/sample_submission.csv') from sklearn.model_selection import train_test_split train, validate = train_test_split(train_data, test_size=0.2, random_state=0) train_x = train.drop('label', axis=1) train_y = train.label test_x = validate.drop('label', axis=1) test_y = validate.label model = RandomForestClassifier(random_state=0, n_estimators=500) model.fit(train_x, train_y) from sklearn.metrics import accuracy_score predict = model.predict(test_x) accuracy_score(test_y, predict) prediction = model.predict(test_data) submission = pd.DataFrame({'ImageId': [i for i in range(1, len(prediction) + 1)], 'Label': prediction}) submission.to_csv('submission.csv', index=False, header=True)
绘图:
plt.plot(range(len(test_y[:100])), test_y[:100], 'bo') plt.plot(range(len(predict[:100])), predict[:100], 'r+') plt.show()