数据特征如下
使用逻辑回归的稳定性选择
import pandas as pd import numpy as np import pyecharts import xlrd # with open(r'F:数据分析专用数据分析与机器学习ankloan.xls', 'rb') as f: file = r'F:数据分析专用数据分析与机器学习ankloan.xls' data = pd.read_excel(file) # print(data.head()) x = data.iloc[:, :8].values # print(x) y = data.iloc[:, 8].values # print(y) from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR rlr = RLR() rlr.fit(x, y) rlr.get_support() validate_feature = data.iloc[:, :8] print(u'有效特征为:%s' % ','.join(validate_feature.columns[rlr.get_support()])) x = data[validate_feature.columns[rlr.get_support()]].values lr = LR() lr.fit(x, y) print(u'模型的平均正确率:%s' % lr.score(x, y))