机器学习实战_3_03_IBM股价预测

一、实战

print("###############################step1: 导入库###########################################")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU, Conv2D
from keras.optimizers import SGD

print("###############################step2: 加载历史数据文件###########################################")
#dataset1 = pd.read_csv("datasets_8388_11883_IBM_2006-01-01_to_2018-01-01.csv")
dataset = pd.read_csv("datasets_8388_11883_IBM_2006-01-01_to_2018-01-01.csv",
                      index_col='Date', parse_dates=['Date'])   #以日期为索引，日期列转换为日期格式
pd.set_option('display.width', 1000)#加了这一行那表格的一行就不会分段出现了
#print(dataset1.head())
print(dataset.head())
print("dataset.shape:  ", dataset.shape)
print(dataset.describe())

print("**************************************xxxxxxxxxxxx*****************************************")
train_set = dataset[:'2016'].iloc[:, 0:1].values # 训练集，获取到第一列的数据,
#train_set = dataset[:'2016'].iloc[:, [0]].values # 训练集，获取到第一列的数据,
test_set = dataset['2017':].iloc[:, 0:1].values # 测试集，获取到第一列的数据
#print(train_set)
print(train_set.shape)
print(test_set.shape)

print("###############################step3: 定义显示函数###########################################")
def plot_predictions(test_result, predict_restult):
    """
    test_result: 真实值
    predict_result: 预测值
    """
    plt.plot(test_result, color='red', label='IBM True Stock Price')
    plt.plot(predict_restult, color='blue', label="IMB prdicted Stock Price")
    plt.title("IBM Stock Price")
    plt.xlabel("Time")
    plt.ylabel("Stock Price")
    plt.legend() # 给图加上图例
    plt.show()

print("###############################step4: 绘制训练集和测试集的数据###########################################")
dataset['High'][:"2016"].plot(figsize=(16, 4), legend=True)
dataset['High']["2017":].plot(figsize=(16, 4), legend=True)
plt.title("IBM Stock Price")
plt.legend(['Train set(before 2016)', 'Test set(after 2017)'])
#plt.show()

# 正则化：将每一维的特征映射到指定的区间：【0，1】
sc = MinMaxScaler(feature_range=[0, 1])
train_set_scaled = sc.fit_transform(train_set)

print("**************************************创建序列数据集（训练和测试）*****************************************")
# 60个时间步为一个样本，1个输出
X_train = []
y_train = []
for i in range(60, 2769):
    X_train.append(train_set_scaled[i-60:i, 0])  #0表示第0列的数据
    y_train.append(train_set_scaled[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train) # numpy类型变换
print(X_train.shape)
print(X_train[0])

# LSTM的输入：(samples, sequence_length, features)
# reshape: 训练集(2709,60)  ---> (2709, 60, 1)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print(X_train.shape)
print((X_train.shape[1], 1))

print("###############################step5: 搭建LSTM模型，进行训练和预测###########################################")
model = tf.keras.models.Sequential(
    tf.keras.layers.LSTM(128, input_shape=(X_train.shape[1], 1))
    # 这里是作为Sequential模型的第一层所以指定input_shape参数，后面加的层不需要这个
    # 这里的input_shape是两个元素的，第一个代表每个输入的样本序列长度，第二个元素代表
    # 每个序列里面的1个元素具有多少个输入数据。例如，LSTM处理的序列长度为10，每个时间
    # 步即序列的元素是由两个维度组成，那么这个参数设置为(10, 2)
    # tf.keras.layers.Dropout(0.2),
    #
    # tf.keras.layers.LSTM(128),
    # tf.keras.layers.Dropout(0.2),
    #
    # tf.keras.layers.LSTM(128),
    # tf.keras.layers.Dropout(0.2),
    #
    # tf.keras.layers.Dense(units=1)

)


# model = Sequential()
# print(model)
#
# # LSTM 第一层
# #model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], 1)))
# model.add(Dropout(0.2))
#
# # LSTM 第二层
# model.add(LSTM(128, return_sequences=True))
# model.add(Dropout(0.2))
#
# # LSTM 第三层
# model.add(LSTM(128))
# model.add(Dropout(0.2))
#
# # Dense层
# model.add(Dense(units=1))

# 模型编译
model.compile(optimizer='rmsprop', loss='mse')

# 模型训练
model.fit(X_train, y_train, epochs=20, batch_size=32)
print("**************************************打印神经网络结构，统计参数数目*****************************************")
# 打印神经网络结构，统计参数数目
model.summary()
print("**************************************构建数据集进行预测*****************************************")
dataset_total = pd.concat((dataset['High'][:"2016"], dataset['High']["2017":]), axis=0)
print(dataset_total.shape)
print(dataset_total)

inputs = dataset_total[len(train_set):].values
inputs = inputs.reshape(-1, 1)
print(inputs.shape)
inputs_scaled = sc.fit_transform(inputs)

print("**************************************构建测试集X_test,进行估价预测*****************************************")
dataset_total = pd.concat((dataset['High'][:"2016"], dataset['High']["2017":]), axis=0)
# 获取输入数据
inputs = dataset_total[len(dataset_total) - len(test_set) - 60:].values
print(inputs)

# 归一化
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
print(inputs.shape)

# 准备测试集X_test,进行股价预测
X_test = []
for i in range(60, 311):
    X_test.append(inputs[i - 60:i, 0])

X_test = np.array(X_test)  # numpy 类型转换
print(X_test.shape)

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
print(X_test.shape)

predict_test = model.predict(X_test) # 预测
print(predict_test.shape)
predict_stock_price = sc.inverse_transform(predict_test)
print(predict_stock_price)
print("**************************************绘制测试结果和预测结果*****************************************")
# 绘制测试结果和预测结果
plot_predictions(test_set, predict_stock_price)

说明：

1.本文为个人学习笔记；

2.学习视频来源：https://space.bilibili.com/474347248/channel/detail?cid=143235

3.数据来源：唐国梁Tommy，为了方便志同道合的伙伴一起学习，我将数据上传到个人盘分享：

链接：https://pan.baidu.com/s/1t0SiTHtcn8BhgM92jYjArA
提取码：g836

4.本文代码运行环境基于pycharm.（原代码是基于jupyter实现的）；

5.代码的一些注释是为了理解，不太标准规范化，但不影响功能实现；

5.欢迎一起讨论学习：386825951@qq.com

相关阅读:
S5PV210 NAND Flash
S5PV210串口
 S5PV210初始化系统时钟
 每日英语：Dishing the Dirt on Hand-Washing Guidelines
每日英语：Vender Assault Shines Ugly Light on China's Urban Enforcers
每日英语：The Perils Of Giving Advice
每日英语：China Targets Big Pharma
每日英语：Asia Has World's Biggest Pay Gap, Study Finds
每日英语：Now on Taobao: Outsourced Care for Grandma
每日英语：The Upside of Favoritism
原文地址：https://www.cnblogs.com/bltstop/p/14816933.html