• 统计模型应用--基本预测手法


    分类器:

    import sklearn
    from sklearn.ensemble import RandomFoerestClassifier
    from sklearn.linear.linear_model import LogisticRegression
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    from sklearn.metrics import confusion_matrix
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
    from sklearn.svm import LinearSVC, SVC

    def create_lagged_series(symbol, start_date, end_date, lags=5):
    ts = web.DataReader(
    symbol, "quandl",
    start_date-datetime.timedelta(days=365),
    end_date
    ).sort_index()
    tslag = pd.DataFrame(index=ts.index)
    tslag["Today"] = ts["AdjClose"]
    tslag['Volume'] = ts['Volume']
    for i in range(0, lags):
    tslag["lag%s"% str(i+1)] = ts['AdjClose'].shift(i+1)
    tsret = pd.DataFrame(index=tslag.index)
    tsret["Volume"] = tslag["Vloume"]
    tsret["Today"] = talag["Today"].pct_change()*100.0
    for i,x in enumerate(tsret['Today']):
    if (abs(x) < 0.0001):
    tsret['Today'][i] = 0.0001
    for i in range(0,lags):
    tsret['lag%s'% str(i+1)] =
    tsret['lag%s'% str(i+1)].pct_change()*100.0
    tsret["Direction"] = np.sign(tsret['Today'])
    tsret = tsret[tsret.index >= start_date]
    return tsret
    if __name__ == '__main__':
    snpret = create_lagged_series(
    "AAPL.US", datetime.datetime(2001,1,10),
    datetime.datetime(2005,12,31), lags=5
    )
    x= snpret[['Lag1', 'Lag2']]
    y = snpret["Direction"]
    start_test = datetime.datetime(2005,1,1)
    X_train = X[X.index < start_test]
    X_test = X[X.index >= start_test]
    Y_train = Y[Y.index < start_test]
    Y_test = Y[Y.index >= start_test]
    print("Hit Rates/Confusion Matrices: ")
    models = [
    ('LR', LogisticRegression()),
    ('LDA',LDA()),
    ('QDA',QDA()),
    ("LSVC",LinearSVC()),
    ("RSVM",SVC(
    C=1000000.0, cache_size=200, class_weight=None,
    coef0=0.0, degree=3, gamma=0.0001, kernel='rbf',
    max_iter=-1, probability=False,random_state=None,
    shrinkinf=True, tol=0.001,verbose=False
    )),
    ('RF', RandomForestClassifier(
    n_estimators=1000,criterion='gini',
    max_depth=None, min_samples_split=2,
    min_samples_leaf=1, max_features='auto',
    bootstrap=True, oob_score=False, n_jobs=1,
    random_state=None, verbose=0)
    )]
    for m in models:
    m[1].fit(X_train, Y_train)
    pred = m[1].predict(X_test)
    print("%s: %0.3f" % (m[0],m[1].score(X_test,Y_test)))
    print("%s " % confusion_matrix(pred, Y_test))

       

  • 相关阅读:
    基于JavaFXWJFXGameEngine游戏引擎介绍与进度
    进程线程与cpu绑定
    [Vim练级攻略] Vim基础操作
    hdu 2159 fate
    python的httplib注意事项
    SQL优化总结
    项目整体开发流程以及配置人员
    hdu 1010 解题报告 Tempter of the Bone
    在centos搭建git服务器时,不小心把/home/git目录删除了,我是怎么恢复的
    int 和bigint差别有多大?
  • 原文地址:https://www.cnblogs.com/kuku0223/p/11064558.html
Copyright © 2020-2023  润新知