# 官方教程 https://www.tensorflow.org/guide/distributed_training?hl=zh-cn
import sys
# import keras
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import os
import matplotlib.pyplot as plt
from Model_encode_decode import Transformer2
import time
gpu_num = 8
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices=gpus[0:gpu_num], device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# # sys.exit(2)
mirrored_strategy = tf.distribute.MirroredStrategy()
# 设置相关底层配置
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
# tf.config.experimental.set_memory_growth(physical_devices[0], True)
global_num = 0
global_train_acc = 0
global_train_acc2 = 0
global_loss2 = 0
@tf.function
def train_step2(inputs, targets):
def step_fn(inputs, targets):
# print('step_fn')
inputs = inputs[:, :, :-1]
tar_real = targets[:, 1:, -1]
tar_real = tf.cast(tar_real, tf.int32)
tar_real = tf.one_hot(tar_real, depth=target_size)
tar_real = tf.squeeze(tar_real, axis=1)
decode_in = targets[:, -1, 5:7]
real_zhangfu = targets[:, 1:, 5]
with tf.GradientTape() as tape:
pre_class, pre_zhangfu = transformer(inputs, decode_in, training=True)
loss, mse_loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)
# loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)
grads = tape.gradient(loss, transformer.trainable_variables)
optimizer.apply_gradients(list(zip(grads, transformer.trainable_variables)))
# return loss
return loss, mse_loss
per_losse,per_mse_loss = mirrored_strategy.run(step_fn, args=(inputs, targets))
# print('per_losse:{},per_mse_loss:{}'.format(per_losse,per_mse_loss))
mean_loss = mirrored_strategy.reduce(
tf.distribute.ReduceOp.SUM, per_losse, axis=None)
mean_mse_loss = mirrored_strategy.reduce(
tf.distribute.ReduceOp.SUM, per_mse_loss, axis=None)
return mean_loss,mean_mse_loss
# return mean_loss
def train_loss_compute(inputs, targets):
inputs = inputs[:, :, :-1]
# inputs = inputs[:, :, 3][:,:,np.newaxis]
tar_real = targets[:, 1:, -1]
tar_real = tf.cast(tar_real, tf.int32)
tar_real = tf.one_hot(tar_real, depth=target_size)
tar_real = tf.squeeze(tar_real, axis=1)
real_zhangfu = targets[:, 1:, 5]
decode_in = targets[:, -1, 5:7]
pre_class, pre_zhangfu = transformer(inputs, decode_in, training=False)
# loss = loss_fun(tar_real, predictions[:,:,3])
loss,mse_loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)
acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc = get_acc(pre_class, tar_real)
acc2 = get_acc2(pre_zhangfu, real_zhangfu)
# return loss, acc.numpy()
return loss, mse_loss,tf.convert_to_tensor([acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc]), acc2
@tf.function
def graph_run(func,args_in):
train_loss, train_mse_loss, train_acc, train_acc2 = mirrored_strategy.run(func,
args=args_in)
return train_loss, train_mse_loss, train_acc, train_acc2
def test_loss_compute(inputs, targets):
inputs = inputs[:, :, :-1]
# inputs = inputs[:, :, 3][:,:,np.newaxis]
tar_real = targets[:, 1:, -1]
tar_real = tf.cast(tar_real, tf.int32)
tar_real = tf.one_hot(tar_real, depth=target_size)
tar_real = tf.squeeze(tar_real, axis=1)
real_zhangfu = targets[:, 1:, 5]
decode_in = targets[:, -1, 5:7]
pre_class, pre_zhangfu = transformer(inputs, decode_in, training=False)
# loss = loss_fun(tar_real, predictions[:,:,3])
loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)
acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc = get_acc(pre_class, tar_real)
acc2 = get_acc2(pre_zhangfu, real_zhangfu)
# return loss, acc.numpy()
return loss, np.array([acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc]), acc2
# loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
# reduction='none')
loss_object = tf.keras.losses.MeanSquaredError()
def loss_fun(y_true, y_true_zhangfu, y_pred, pre_zhangfu):
loss_ = tf.losses.categorical_crossentropy(y_true, y_pred)
loss_zhangfu = tf.losses.MSE(y_true_zhangfu, pre_zhangfu)
loss_zhangfu = tf.reduce_mean(loss_zhangfu)
add_weight = 2
# two_weight = add_weight * (ONE / TWO)
# three_weight = add_weight * (THREE / TWO)
y_true_argmax = tf.argmax(y_true, axis=-1)
y_pre_argmax = tf.argmax(y_pred, axis=-1)
preB_tS = ((y_true_argmax == 4) | (y_true_argmax == 5) | (y_true_argmax == 6)) & (
(y_pre_argmax == 0) | (y_pre_argmax == 1) | (y_pre_argmax == 2) | (y_pre_argmax == 3))
loss = tf.where(preB_tS, loss_ * add_weight, loss_)
# loss = tf.reduce_mean(loss)
loss = tf.reduce_sum(loss) * (1.0/bat)
loss_mix = loss + 1 * loss_zhangfu
# print('loss_class:{},loss_mse:{},loss_mix:{}'.format(loss,loss_zhangfu,loss_mix))
# sys.exit(2)
return loss_mix, loss_zhangfu
# return loss_mix
def get_acc(pre, y):
y = tf.argmax(y, axis=-1)
pre = tf.argmax(pre, axis=-1)
y_0 = tf.cast(y == 0, tf.int16)
y_0 = tf.reduce_sum(tf.cast(y_0, tf.float32))
y_1 = tf.cast(y == 1, tf.int16)
y_1 = tf.reduce_sum(tf.cast(y_1, tf.float32))
y_2 = tf.cast(y == 2, tf.int16)
y_2 = tf.reduce_sum(tf.cast(y_2, tf.float32))
y_3 = tf.cast(y == 3, tf.int16)
y_3 = tf.reduce_sum(tf.cast(y_3, tf.float32))
y_4 = tf.cast(y == 4, tf.int16)
y_4 = tf.reduce_sum(tf.cast(y_4, tf.float32))
y_5 = tf.cast(y == 5, tf.int16)
y_5 = tf.reduce_sum(tf.cast(y_5, tf.float32))
y_6 = tf.cast(y == 6, tf.int16)
y_6 = tf.reduce_sum(tf.cast(y_6, tf.float32))
pre_1 = tf.cast(pre == 1, tf.int16)
pre_1 = tf.reduce_sum(tf.cast(pre_1, tf.float32))
# print('预测和标签分布 zero: {}, one : {}, two : {} , pre_1: {}'.format(y_0,y_1,y_2,pre_1))
# ALL = y_0+y_1+y_2
acc_eq = tf.cast(tf.equal(pre, y), tf.int16)
acc_float = tf.cast(acc_eq, tf.float32)
acc = tf.reduce_mean(acc_float)
one_one = (y == pre) & (y == 1)
one_one = tf.cast(one_one, tf.int16)
one_one_acc = tf.reduce_sum(tf.cast(one_one, tf.float32)) / (y_1 + 1e-9)
# one_two = (pre == 1) & (y == 2)
# one_two = tf.cast(one_two, tf.int16)
# one_two_acc = tf.reduce_sum(tf.cast(one_two, tf.float32)) / y_2
two_two = (pre == y) & (y == 2)
two_two = tf.cast(two_two, tf.int16)
two_two_acc = tf.reduce_sum(tf.cast(two_two, tf.float32)) / (y_2 + 1e-9)
c3_c3 = (pre == y) & (y == 3)
c3_c3 = tf.cast(c3_c3, tf.int16)
c3_c3_acc = tf.reduce_sum(tf.cast(c3_c3, tf.float32)) / (y_3 + 1e-9)
c4_c4 = (pre == y) & (y == 4)
c4_c4 = tf.cast(c4_c4, tf.int16)
c4_c4_acc = tf.reduce_sum(tf.cast(c4_c4, tf.float32)) / (y_4 + 1e-9)
c5_c5 = (pre == y) & (y == 5)
c5_c5 = tf.cast(c5_c5, tf.int16)
c5_c5_acc = tf.reduce_sum(tf.cast(c5_c5, tf.float32)) / (y_5 + 1e-9)
c6_c6 = (pre == y) & (y == 6)
c6_c6 = tf.cast(c6_c6, tf.int16)
c6_c6_acc = tf.reduce_sum(tf.cast(c6_c6, tf.float32)) / (y_6 + 1e-9)
c7 = ((y == 4) | (y == 5) | (y == 6)) & ((pre == 0) | (pre == 1) | (pre == 2) | (pre == 3))
c7 = tf.cast(c7, tf.int16)
c7_acc = tf.reduce_mean(tf.cast(c7, tf.float32))
# other_one = (pre == 1) & (y != 1)
# other_one = tf.cast(other_one, tf.int16)
# other_one_acc = tf.reduce_mean(tf.cast(other_one, tf.float32)) /pre_1
# return (
# acc.numpy(), one_one_acc.numpy(), two_two_acc.numpy(), c3_c3_acc.numpy(), c4_c4_acc.numpy(), c5_c5_acc.numpy(),
# c6_c6_acc.numpy(), c7_acc.numpy())
return (
acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc,
c6_c6_acc, c7_acc)
def get_acc2(pre, y):
y_updown = tf.where(y > 0, 1, 0)
pre_updown = tf.where(pre > 0, 1, 0)
acc_eq = tf.cast(tf.equal(pre_updown, y_updown), tf.int16)
acc_float = tf.cast(acc_eq, tf.float32)
acc = tf.reduce_mean(acc_float)
# return acc.numpy()
return acc
def fun_fenbu(x):
z_0 = tf.cast(x == 0, tf.float32)
z_0 = tf.reduce_sum(z_0)
one = tf.cast(x == 1, tf.float32)
one = tf.reduce_sum(one)
two = tf.cast(x == 2, tf.float32)
two = tf.reduce_sum(two)
three = tf.cast(x == 3, tf.float32)
three = tf.reduce_sum(three)
f_4 = tf.cast(x == 4, tf.float32)
f_4 = tf.reduce_sum(f_4)
f_5 = tf.cast(x == 5, tf.float32)
f_5 = tf.reduce_sum(f_5)
s_6 = tf.cast(x == 6, tf.float32)
s_6 = tf.reduce_sum(s_6)
print(
'Zero:{},\tOne:{},\tTwo:{},\tThree:{},\tFour:{},\tFIVE:{},\tSix:{}'.format(z_0, one, two, three, f_4, f_5, s_6))
# 其他 # 3天涨2% # 3天涨4% # 5天涨8% # 3天跌-2% # 3天跌-4% # 3天跌-8%
return (z_0, one, two, three, f_4, f_5, s_6)
if __name__ == '__main__':
# tf.random.set_seed(1)
# gu_num = 'sz.002782' # 可立克
# gu_num = 'sh.600460' # 士兰微
# gu_num = 'sz.002739' # 万达
# gu_num = 'sh.600313' # 农发种业
# gu_num = 'sh.000001' # 上证
qian = 60
hou = 1
# qian = 30
# hou = 5
# split_test = -1
# split_test = 10
# 超参数
# EPOCHS = 1
EPOCHS = 2000000
Threshold = 3
num_layers = 3
# num_layers = 1
d_model = 512
# d_model = 256
dff = 1024
# dff = 256
# num_heads = 8
num_heads = 8
dropout_rate = 0.11
test_split = -1
# test_split = 40000
# target_size = 3
# test_index = 20000
test_index = -1
target_size = 7
# reslut_seq = 60
learing_rate = 1e-5
# bat = 300
bat = 700
bat = bat*gpu_num
# guiyi_price = True
guiyi_price = False
# load_flag = False
load_flag = True
with mirrored_strategy.scope():
transformer = Transformer2(num_layers, d_model, num_heads, dff, target_size, dropout_rate)
########################## 加载模型 ######################################
model_save_path = './save_model'
if load_flag:
# load_name = r"data_410000,epoch_70_trainloss_1.38_testloss_0.00" # 410000预训练模型 drop = 0.1 80 %
# load_name = r"data_-1,epoch_80_trainloss_0.77_testloss_0.00" # -1预训练模型 drop = 0.1 %
# load_name = r"data_-1,epoch_7_trainloss_0.53_testloss_3.46" # -1预训练模型 drop = 0.1 %
# load_name = r"drop_0.31,epoch_138_trainloss_1.30_testloss_3.42" # -1预训练模型 drop = 0.31 70 %
load_name = r"drop_0.34,epoch_11_trainloss_1.27_testloss_3.60" # -1预训练模型 drop = 0.32 73 %
temp_path = './decode_model'
load_path = os.path.join(temp_path, load_name)
load_path = os.path.join(load_path, load_name)
transformer.load_weights(load_path)
print('加载模型')
########################## 加载模型 ######################################
# optimizer = tf.keras.optimizers.Adam(learing_rate, beta_1=0.9,
# beta_2=0.999, epsilon=1e-9)
# optimizer = tf.keras.optimizers.Nadam(lr=learing_rate, be ta_1=0.9, beta_2=0.999, epsilon=1e-09, schedule_decay=0.004)
# optimizer = tf.keras.optimizers.Adadelta(lr=learing_rate)
# optimizer = tf.keras.optimizers.SGD(lr= learing_rate,momentum=0.9)
optimizer = tf.keras.optimizers.Adamax(lr=learing_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-09)
# optimizer = tf.keras.optimizers.Adamax(lr=learing_rate, beta_1=0.99, beta_2=0.999, epsilon=1e-09)
train_list = []
test_list = []
train_acc_list = []
test_acc_list = []
# 数据加载和处理
# train_x, train_y = concat_db()
# data_dir = r'C:\股票\股票包\new_dir\data\all_gu'
# data = np.load(os.path.join(data_dir,'all_gu.npz'))
data_dir = './data/all_test'
data = np.load(os.path.join(data_dir, 'guiyi_False_all_data.npz'))
train_x, train_y = data['x'][:test_split], data['y'][:test_split]
db = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(1000000).batch(bat)
db = mirrored_strategy.experimental_distribute_dataset(db)
print()
print('******************** Train数据分布 ********************')
z_0, one, two, three, f_4, f_5, s_6 = fun_fenbu(train_y[:, -1, -1])
####################################### Test ##############################################333
val_dir = './data/val_data'
val_data = np.load(os.path.join(val_dir, 'guiyi_False_val_data.npz'))
mix_part_x, mix_part_y = val_data['x'][:test_index], val_data['y'][:test_index]
# mix_part_x, mix_part_y = get_data(gu_num)
db_test = tf.data.Dataset.from_tensor_slices((mix_part_x, mix_part_y)).batch(bat)
print('******************** TEST数据分布 ********************')
z_0, one, two, three, f_4, f_5, s_6 = fun_fenbu(mix_part_y[:, -1, -1])
with mirrored_strategy.scope():
for epoch in range(EPOCHS):
start_time = time.time()
for batch, (inputs, targets) in enumerate(db):
train_step2(inputs,targets)
# continue
# sys.exit(2)
if batch % 100 == 0:
# train_loss,train_mse_loss, train_acc, train_acc2 = mirrored_strategy.run(train_loss_compute, args=(inputs, targets))
train_loss, train_mse_loss, train_acc, train_acc2 = graph_run(train_loss_compute,args_in=(inputs, targets))
# print('per_losse:{},per_mse_loss:{}'.format(per_losse,per_mse_loss))
mean_loss = mirrored_strategy.reduce(
tf.distribute.ReduceOp.SUM, train_loss, axis=None)
mean_mse_loss = mirrored_strategy.reduce(
tf.distribute.ReduceOp.SUM, train_mse_loss, axis=None)
mean_acc = mirrored_strategy.reduce(
tf.distribute.ReduceOp.MEAN, train_acc, axis=None)
mean_acc2 = mirrored_strategy.reduce(
tf.distribute.ReduceOp.MEAN, train_acc2, axis=None)
end_time = time.time()
# print()
print('epoch:{},batch:{}'.format(epoch, batch))
print('train_loss:{},\tMSE_loss:{}'.format(mean_loss.numpy(), mean_mse_loss.numpy()))
print('ACC标签顺序: acc,\t acc_1,\t acc_2,\t acc_3,\t acc_4,\t acc_5,\t acc_6,\t acc_7')
print('train_acc:{}'.format(mean_acc.numpy().round(3)))
print('涨幅预测正确train:{:.2f}'.format(mean_acc2))
print('time:{:.3f}'.format(end_time - start_time))
start_time = time.time()
# 测试集测试
if batch % 500 == 0:
# if batch % 500 == 0 and batch != 0:
test_loss_all = 0
num = 0
acc = np.array([0, 0, 0, 0, 0, 0, 0, 0]).astype(np.float)
acc2 = 0
for batch_test, (test_inputs, test_targets) in enumerate(db_test):
num += 1
test_loss, test_acc, test_acc2 = test_loss_compute(test_inputs, test_targets)
test_loss = tf.reduce_mean(test_loss).numpy()
test_loss_all += test_loss
acc += test_acc
acc2 += test_acc2
# print('test_in_acc:',test_acc)
test_loss_all = test_loss_all / (num + 1e-9)
acc_all = acc / (num + 1e-9)
acc_all2 = acc2 / (num + 1e-9)
print('test_loss_all :', test_loss_all)
print('test_acc :{}'.format(acc_all.round(3)))
print('涨幅预测正确train:{:.2f},test:{:.2f}'.format(global_train_acc2, acc_all2))
if (epoch % 1 == 0) and (batch != 0):
# print()
print('保存模型')
temp_model_name = 'drop_{},epoch_{}_trainloss_{:.2f}_testloss_{:.2f}'.format(dropout_rate, epoch,
mean_loss.numpy(),
test_loss_all)
if not os.path.exists('./decode_model'):
os.mkdir('./decode_model')
temp_model_save = os.path.join('./decode_model', temp_model_name)
if not os.path.exists(temp_model_save):
os.mkdir(temp_model_save)
transformer.save_weights(os.path.join(temp_model_save, temp_model_name))