print "Performing greedy feature selection..." score_hist = [] N = 10 good_features = set([]) # Greedy feature selection loop while len(score_hist) < 2 or score_hist[-1][0] > score_hist[-2][0]: scores = [] for f in range(len(Xts)): if f not in good_features: feats = list(good_features) + [f] Xt = sparse.hstack([Xts[j] for j in feats]).tocsr() score = cv_loop(Xt, y, model, N) scores.append((score, f)) print "Feature: %i Mean AUC: %f" % (f, score) good_features.add(sorted(scores)[-1][1]) score_hist.append(sorted(scores)[-1]) print "Current features: %s" % sorted(list(good_features))
注意还没结束:
# Remove last added feature from good_features good_features.remove(score_hist[-1][1])
from kaggle