原生形式使用Xgboost(import xgboost as xgb)
from sklearn import datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
import numpy as np
from sklearn.metrics import precision_score, recall_score
# 加载数据
iris = datasets.load_iris()
X = iris.data
y = iris.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test))
# 转换为DMatrix数据格式
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
# 设置参数
parameters = {
'eta': 0.3,
'silent': True, # option for logging
'objective': 'multi:softprob', # error evaluation for multiclass tasks
'num_class': 3, # number of classes to predic
'max_depth': 3 # depth of the trees in the boosting process
}
num_round = 20 # the number of training iterations
# 模型训练
bst = xgb.train(parameters, dtrain, num_round)
# 模型预测
preds = bst.predict(dtest)
print(preds[:5])
# 选择表示最高概率的列
best_preds = np.asarray([np.argmax(line) for line in preds])
print(best_preds)
# 模型评估
print(precision_score(y_test, best_preds, average='macro')) # 精准率
print(recall_score(y_test, best_preds, average='macro')) # 召回率
Sklearn接口形式使用Xgboost(from xgboost import XGBClassifier)
from sklearn import datasets
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score
# 加载数据
iris = datasets.load_iris()
X = iris.data
y = iris.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test))
# 模型训练
model = XGBClassifier(
learning_rate=0.01,
n_estimators=3000,
max_depth=4,
min_child_weight=5,
gamma=0.1,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=1,
objective='binary:logistic',
nthread=8,
scale_pos_weight=1,
seed=27
)
model.fit(X_train, y_train)
# 预测
y_pred = model.predict(X_test)
# 模型评估
print(precision_score(y_test, y_pred, average='macro')) # 精准率
print(recall_score(y_test, y_pred, average='macro')) # 召回率