• CNN运用在股票数据


    from __future__ import division
    from __future__ import print_function  
    import numpy as np
    import pandas as pd
    import matplotlib.pylab as plt
    import seaborn as sns
    import tensorflow as tf
    from tensorflow.python.ops import rnn, rnn_cell
    import tushare as ts
    
    def getData(id,start,end,num,flag):
        df = ts.get_hist_data(id,start,end)
        #df = (df-np.sum(df)/len(df))/(np.std(df))
        if(flag=="true"):
            df = df[1:num]
        else:
            df = df[:num]
        df1 = np.array(df)
        #df2 = np.array(df.index)
        
        ##df = df.T
        x = []
        for i in range(len(df1)):
            #temp = np.append(df2[i],df1[i])
            temp = df1[i]
            newresult = []
            for item in temp:
                newresult.append(item)
            x.append(newresult)
        x.reverse()
        return x
    
    
    def getDataR(id,start,end,num):
        df = ts.get_hist_data(id,start,end)
        df1 = np.array(df)
        x = []
        for i in range(len(df1)):
            temp = df1[i]
            newresult = []
            for item in temp:
                newresult.append(item)
            x.append(newresult)
        
        P=df['close']
        #实际上是没有end那一天的数据,这里是预测未来一天相对于现在的收盘价
        templist=(P.shift(1)-P)/P
        templist = templist[:num]
        templist = np.array(templist)
        templist = templist.tolist()
        templist.reverse()
        tempDATA = []
        for i in range(len(templist)):
            if((i+1)%10!=0):
                pass
            else:
                if(templist[i]>0):
                    #tempDATA.append(templist[i])
                    tempDATA.append([1,0,0])
                elif(templist[i]<=0):
                    #tempDATA.append(templist[i])
                    tempDATA.append([0,1,0])
                else:
                    #tempDATA.append(templist[i])
                    tempDATA.append([0,0,1])
                
        y=tempDATA
        return y
    
    #df_sh = ts.get_sz50s()['code']
    df_sh =["600016"]
    fac = []
    ret = []
    facT = []
    retT = []
    predFAC = []
    for ishare in df_sh:
        #取最近的260天数据
        newfac = getData(ishare,'2008-07-22','2016-08-01',601,"true")
        newret = getDataR(ishare,'2008-07-22','2016-08-01',601)
        #fac.append(newfac)
        for i in range(len(newfac)):
            fac.append(newfac[i])
        for i in range(len(newret)):
            ret.append(newret[i])
        
        newfacT = getData(ishare,'2016-08-01','2017-01-19',101,"true")
        newretT = getDataR(ishare,'2016-08-01','2017-01-19',101)
        #fac.append(newfac)
        for i in range(len(newfacT)):
            facT.append(newfacT[i])
        for i in range(len(newretT)):
            retT.append(newretT[i])
        
        newpredFAC = getData(ishare,'2016-08-01','2017-01-20',11,"false")
        for i in range(len(newpredFAC)):
            predFAC.append(newpredFAC[i])
    
    fac = np.array(fac)
    ret = np.array(ret)
    meanfac = np.sum(fac, axis=0)/len(fac)
    stdfac = np.std(fac, axis=0)
    fac = (fac-meanfac)/stdfac
    
    facT = np.array(facT)
    retT = np.array(retT)
    facT = (facT-meanfac)/stdfac
    
    
    newf = []
    newfa = []
    for i in range(len(fac)):
        if((i+1)%10!=0):
            newf.append(fac[i])
        else:
            newf.append(fac[i])
            newfa.append(newf)
            newf = []
    fac = np.array(newfa)
    newfT = []
    newfaT = []
    for i in range(len(facT)):
        if((i+1)%10!=0):
            newfT.append(facT[i])
        else:
            newfT.append(facT[i])
            newfaT.append(newfT)
            newfT = []
    facT = np.array(newfaT)
    
    predFAC = (predFAC-meanfac)/stdfac
    
    
    learning_rate = 0.001
    batch_size = 10
    print(int(fac.shape[0]))
    training_iters = int(fac.shape[0]/batch_size)
    display_step = 10
    
    # Network Parameters
    n_input = 14
    n_steps = 10
    n_hidden = 1024
    n_classes = 3
    dropout = 0.8
    # tf Graph input
    x = tf.placeholder('float',[None, n_steps, n_input])
    y = tf.placeholder('float',[None, n_classes])
    keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
    
    
    def CNN_Net_five(x,weights,biases,dropout=0.8,m=1):
        
        x = tf.reshape(x, shape=[-1,10,14,1])
        
        # 卷积层1
        x = tf.nn.conv2d(x, weights['wc1'], strides=[1,m,m,1],padding='SAME')
        x = tf.nn.bias_add(x,biases['bc1'])
        x = tf.nn.relu(x)
        
        # 卷积层2 
        x = tf.nn.conv2d(x, weights['wc2'], strides=[1,m,m,1],padding='SAME')
        x = tf.nn.bias_add(x,biases['bc2'])
        x = tf.nn.relu(x)
        
        # 卷积层3 
        x = tf.nn.conv2d(x, weights['wc3'], strides=[1,m,m,1],padding='SAME')
        x = tf.nn.bias_add(x,biases['bc3'])
        x = tf.nn.relu(x)    
        
        # 卷积层4 
        x = tf.nn.conv2d(x, weights['wc4'], strides=[1,m,m,1],padding='SAME')
        x = tf.nn.bias_add(x,biases['bc4'])
        x = tf.nn.relu(x) 
        
        # 卷积层5 
        x = tf.nn.conv2d(x, weights['wc5'], strides=[1,m,m,1],padding='SAME')
        x = tf.nn.bias_add(x,biases['bc5'])
        x = tf.nn.relu(x) 
        
        # 全连接层
        x = tf.reshape(x,[-1,weights['wd1'].get_shape().as_list()[0]])
        x = tf.add(tf.matmul(x,weights['wd1']),biases['bd1'])
        x = tf.nn.relu(x)
        
        # Apply Dropout
        x = tf.nn.dropout(x,dropout)
        # Output, class prediction
        x = tf.add(tf.matmul(x,weights['out']),biases['out'])
        return x
    
    # Store layers weight & bias
    weights = {
        'wc1': tf.Variable(tf.random_normal([5, 5, 1, 16])),
        'wc2': tf.Variable(tf.random_normal([5, 5, 16, 32])),
        'wc3': tf.Variable(tf.random_normal([5, 5, 32, 64])),
        'wc4': tf.Variable(tf.random_normal([5, 5, 64, 32])),
        'wc5': tf.Variable(tf.random_normal([5, 5, 32, 16])),
        # fully connected, 7*7*64 inputs, 1024 outputs
        'wd1': tf.Variable(tf.random_normal([n_steps*n_input*16, 1024])),
        'out': tf.Variable(tf.random_normal([1024, n_classes]))
    }
    
    biases = {
        'bc1': tf.Variable(tf.random_normal([16])),
        'bc2': tf.Variable(tf.random_normal([32])),
        'bc3': tf.Variable(tf.random_normal([64])),
        'bc4': tf.Variable(tf.random_normal([32])),
        'bc5': tf.Variable(tf.random_normal([16])),
        'bd1': tf.Variable(tf.random_normal([1024])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }
    
    
    pred = CNN_Net_five(x,weights,biases,dropout=keep_prob)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    correct_pred = tf.equal(tf.argmax(pred,1),tf.arg_max(y,1))
    # tf.argmax(input,axis=None) 由于标签的数据格式是 -1 0 1 3列,该语句是表示返回值最大也就是1的索引,两个索引相同则是预测正确。
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # 更改数据格式,降低均值
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        print("**")
        print(training_iters)
        for tr in range(15):
        #for tr in range(3):
            for i in range(int(len(fac)/batch_size)):
                batch_x = fac[i*batch_size:(i+1)*batch_size].reshape([batch_size,n_steps,n_input])
                batch_y = ret[i*batch_size:(i+1)*batch_size].reshape([batch_size,n_classes])
                sess.run(optimizer,feed_dict={x:batch_x,y:batch_y,keep_prob:dropout})
                if(i%50==0):
                    print(i,'----',(int(len(fac)/batch_size)))
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,y: batch_y, keep_prob:0.8})
            print("Iter " + str(tr*batch_size) + ", Minibatch Loss= " +"{:.26f}".format(loss) + ", Training Accuracy= " +"{:.26f}".format(acc))
        print("Optimization Finished!") 
        print("Accuracy in data set")
        test_data = fac[:batch_size].reshape([batch_size,n_steps,n_input])
        test_label = ret[:batch_size].reshape([batch_size,n_classes])
        loss, acc = sess.run([cost, accuracy], feed_dict={x: test_data,y: test_label, keep_prob:1.})
        print("Accuracy= " +"{:.26f}".format(acc))
        
        print("Accuracy out of data set")
        test_dataT = facT[:len(facT)].reshape([len(facT),n_steps,n_input])
        test_labelT = retT[:len(facT)].reshape([len(facT),n_classes])
        loss, acc = sess.run([cost, accuracy], feed_dict={x: test_dataT,y: test_labelT, keep_prob:1.})
        print("Accuracy= " +"{:.26f}".format(acc))
        
        pred_dataT = predFAC[:batch_size].reshape([1,n_steps,n_input])
        pred_lable = sess.run([pred],feed_dict={x: pred_dataT, keep_prob:1.})
        list_lable = pred_lable[0][0]
        maxindex = np.argmax(list_lable)
        #print("Predict_label is " + str(pred_lable[0][0]))
        if(maxindex==0):
            print("up")
        else:
            print("down")
        sess.close()

    虽然使用了dropout,但是结果还是存在过拟合的问题

  • 相关阅读:
    FiddlerScript修改特定请求参数下的返回值
    nginx设置反向代理后,页面上的js css文件无法加载
    通过外网访问内网服务器
    linux下使用正确的用户名密码,本地无法连接mysql
    合并重叠时间段C#
    数据库一直显示为单用户,解决办法
    windows下使用tomcat部署网站
    数据库一直还原中,解决办法
    通过mdf ldf文件还原数据库
    知道css有个content属性吗?有什么作用?有什么应用?
  • 原文地址:https://www.cnblogs.com/yunerlalala/p/6341050.html
Copyright © 2020-2023  润新知