分类器:
import sklearn
from sklearn.ensemble import RandomFoerestClassifier
from sklearn.linear.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import confusion_matrix
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.svm import LinearSVC, SVC
def create_lagged_series(symbol, start_date, end_date, lags=5):
ts = web.DataReader(
symbol, "quandl",
start_date-datetime.timedelta(days=365),
end_date
).sort_index()
tslag = pd.DataFrame(index=ts.index)
tslag["Today"] = ts["AdjClose"]
tslag['Volume'] = ts['Volume']
for i in range(0, lags):
tslag["lag%s"% str(i+1)] = ts['AdjClose'].shift(i+1)
tsret = pd.DataFrame(index=tslag.index)
tsret["Volume"] = tslag["Vloume"]
tsret["Today"] = talag["Today"].pct_change()*100.0
for i,x in enumerate(tsret['Today']):
if (abs(x) < 0.0001):
tsret['Today'][i] = 0.0001
for i in range(0,lags):
tsret['lag%s'% str(i+1)] =
tsret['lag%s'% str(i+1)].pct_change()*100.0
tsret["Direction"] = np.sign(tsret['Today'])
tsret = tsret[tsret.index >= start_date]
return tsret
if __name__ == '__main__':
snpret = create_lagged_series(
"AAPL.US", datetime.datetime(2001,1,10),
datetime.datetime(2005,12,31), lags=5
)
x= snpret[['Lag1', 'Lag2']]
y = snpret["Direction"]
start_test = datetime.datetime(2005,1,1)
X_train = X[X.index < start_test]
X_test = X[X.index >= start_test]
Y_train = Y[Y.index < start_test]
Y_test = Y[Y.index >= start_test]
print("Hit Rates/Confusion Matrices: ")
models = [
('LR', LogisticRegression()),
('LDA',LDA()),
('QDA',QDA()),
("LSVC",LinearSVC()),
("RSVM",SVC(
C=1000000.0, cache_size=200, class_weight=None,
coef0=0.0, degree=3, gamma=0.0001, kernel='rbf',
max_iter=-1, probability=False,random_state=None,
shrinkinf=True, tol=0.001,verbose=False
)),
('RF', RandomForestClassifier(
n_estimators=1000,criterion='gini',
max_depth=None, min_samples_split=2,
min_samples_leaf=1, max_features='auto',
bootstrap=True, oob_score=False, n_jobs=1,
random_state=None, verbose=0)
)]
for m in models:
m[1].fit(X_train, Y_train)
pred = m[1].predict(X_test)
print("%s: %0.3f" % (m[0],m[1].score(X_test,Y_test)))
print("%s " % confusion_matrix(pred, Y_test))