tensorflow单机多卡分布式训练（From_server2_gpus）

#　　官方教程 https://www.tensorflow.org/guide/distributed_training?hl=zh-cn

import sys
# import keras
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import os
import matplotlib.pyplot as plt
from Model_encode_decode import Transformer2
import time


gpu_num = 8
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices=gpus[0:gpu_num], device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
# # sys.exit(2)
mirrored_strategy = tf.distribute.MirroredStrategy()

#   设置相关底层配置
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

global_num = 0
global_train_acc = 0
global_train_acc2 = 0
global_loss2 = 0

@tf.function
def train_step2(inputs, targets):
    def step_fn(inputs, targets):
        # print('step_fn')
        inputs = inputs[:, :, :-1]
        tar_real = targets[:, 1:, -1]
        tar_real = tf.cast(tar_real, tf.int32)
        tar_real = tf.one_hot(tar_real, depth=target_size)
        tar_real = tf.squeeze(tar_real, axis=1)

        decode_in = targets[:, -1, 5:7]
        real_zhangfu = targets[:, 1:, 5]


        with tf.GradientTape() as tape:
            pre_class, pre_zhangfu = transformer(inputs, decode_in, training=True)
            loss, mse_loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)
            # loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)

        grads = tape.gradient(loss, transformer.trainable_variables)
        optimizer.apply_gradients(list(zip(grads, transformer.trainable_variables)))
        # return loss

        return loss, mse_loss

    per_losse,per_mse_loss = mirrored_strategy.run(step_fn, args=(inputs, targets))
    # print('per_losse:{},per_mse_loss:{}'.format(per_losse,per_mse_loss))
    mean_loss = mirrored_strategy.reduce(
        tf.distribute.ReduceOp.SUM, per_losse, axis=None)
    mean_mse_loss = mirrored_strategy.reduce(
        tf.distribute.ReduceOp.SUM, per_mse_loss, axis=None)
    return mean_loss,mean_mse_loss
    # return mean_loss

def train_loss_compute(inputs, targets):
    inputs = inputs[:, :, :-1]
    # inputs = inputs[:, :, 3][:,:,np.newaxis]

    tar_real = targets[:, 1:, -1]
    tar_real = tf.cast(tar_real, tf.int32)
    tar_real = tf.one_hot(tar_real, depth=target_size)
    tar_real = tf.squeeze(tar_real, axis=1)
    real_zhangfu = targets[:, 1:, 5]
    decode_in = targets[:, -1, 5:7]

    pre_class, pre_zhangfu = transformer(inputs, decode_in, training=False)
    # loss = loss_fun(tar_real, predictions[:,:,3])
    loss,mse_loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)

    acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc = get_acc(pre_class, tar_real)
    acc2 = get_acc2(pre_zhangfu, real_zhangfu)
    # return loss, acc.numpy()
    return loss, mse_loss,tf.convert_to_tensor([acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc]), acc2
@tf.function
def graph_run(func,args_in):
    train_loss, train_mse_loss, train_acc, train_acc2 = mirrored_strategy.run(func,
                                                                              args=args_in)
    return train_loss, train_mse_loss, train_acc, train_acc2


def test_loss_compute(inputs, targets):
    inputs = inputs[:, :, :-1]
    # inputs = inputs[:, :, 3][:,:,np.newaxis]

    tar_real = targets[:, 1:, -1]
    tar_real = tf.cast(tar_real, tf.int32)
    tar_real = tf.one_hot(tar_real, depth=target_size)
    tar_real = tf.squeeze(tar_real, axis=1)
    real_zhangfu = targets[:, 1:, 5]
    decode_in = targets[:, -1, 5:7]

    pre_class, pre_zhangfu = transformer(inputs, decode_in, training=False)
    # loss = loss_fun(tar_real, predictions[:,:,3])
    loss = loss_fun(tar_real, real_zhangfu, pre_class, pre_zhangfu)

    acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc = get_acc(pre_class, tar_real)
    acc2 = get_acc2(pre_zhangfu, real_zhangfu)
    # return loss, acc.numpy()
    return loss, np.array([acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc, c6_c6_acc, c7_acc]), acc2


# loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
#                                                             reduction='none')
loss_object = tf.keras.losses.MeanSquaredError()


def loss_fun(y_true, y_true_zhangfu, y_pred, pre_zhangfu):
    loss_ = tf.losses.categorical_crossentropy(y_true, y_pred)
    loss_zhangfu = tf.losses.MSE(y_true_zhangfu, pre_zhangfu)
    loss_zhangfu = tf.reduce_mean(loss_zhangfu)
    add_weight = 2
    # two_weight = add_weight * (ONE / TWO)
    # three_weight = add_weight * (THREE / TWO)
    y_true_argmax = tf.argmax(y_true, axis=-1)
    y_pre_argmax = tf.argmax(y_pred, axis=-1)
    preB_tS = ((y_true_argmax == 4) | (y_true_argmax == 5) | (y_true_argmax == 6)) & (
                (y_pre_argmax == 0) | (y_pre_argmax == 1) | (y_pre_argmax == 2) | (y_pre_argmax == 3))
    loss = tf.where(preB_tS, loss_ * add_weight, loss_)
    # loss = tf.reduce_mean(loss)
    loss = tf.reduce_sum(loss) * (1.0/bat)
    loss_mix = loss + 1 * loss_zhangfu
    # print('loss_class:{},loss_mse:{},loss_mix:{}'.format(loss,loss_zhangfu,loss_mix))
    # sys.exit(2)
    return loss_mix, loss_zhangfu
    # return loss_mix


def get_acc(pre, y):
    y = tf.argmax(y, axis=-1)
    pre = tf.argmax(pre, axis=-1)

    y_0 = tf.cast(y == 0, tf.int16)
    y_0 = tf.reduce_sum(tf.cast(y_0, tf.float32))

    y_1 = tf.cast(y == 1, tf.int16)
    y_1 = tf.reduce_sum(tf.cast(y_1, tf.float32))

    y_2 = tf.cast(y == 2, tf.int16)
    y_2 = tf.reduce_sum(tf.cast(y_2, tf.float32))

    y_3 = tf.cast(y == 3, tf.int16)
    y_3 = tf.reduce_sum(tf.cast(y_3, tf.float32))

    y_4 = tf.cast(y == 4, tf.int16)
    y_4 = tf.reduce_sum(tf.cast(y_4, tf.float32))

    y_5 = tf.cast(y == 5, tf.int16)
    y_5 = tf.reduce_sum(tf.cast(y_5, tf.float32))

    y_6 = tf.cast(y == 6, tf.int16)
    y_6 = tf.reduce_sum(tf.cast(y_6, tf.float32))

    pre_1 = tf.cast(pre == 1, tf.int16)
    pre_1 = tf.reduce_sum(tf.cast(pre_1, tf.float32))

    # print('预测和标签分布  zero:    {}, one :   {},    two :   {} , pre_1:  {}'.format(y_0,y_1,y_2,pre_1))

    # ALL = y_0+y_1+y_2

    acc_eq = tf.cast(tf.equal(pre, y), tf.int16)
    acc_float = tf.cast(acc_eq, tf.float32)
    acc = tf.reduce_mean(acc_float)

    one_one = (y == pre) & (y == 1)
    one_one = tf.cast(one_one, tf.int16)
    one_one_acc = tf.reduce_sum(tf.cast(one_one, tf.float32)) / (y_1 + 1e-9)

    # one_two = (pre == 1) & (y == 2)
    # one_two = tf.cast(one_two, tf.int16)
    # one_two_acc = tf.reduce_sum(tf.cast(one_two, tf.float32)) / y_2

    two_two = (pre == y) & (y == 2)
    two_two = tf.cast(two_two, tf.int16)
    two_two_acc = tf.reduce_sum(tf.cast(two_two, tf.float32)) / (y_2 + 1e-9)

    c3_c3 = (pre == y) & (y == 3)
    c3_c3 = tf.cast(c3_c3, tf.int16)
    c3_c3_acc = tf.reduce_sum(tf.cast(c3_c3, tf.float32)) / (y_3 + 1e-9)

    c4_c4 = (pre == y) & (y == 4)
    c4_c4 = tf.cast(c4_c4, tf.int16)
    c4_c4_acc = tf.reduce_sum(tf.cast(c4_c4, tf.float32)) / (y_4 + 1e-9)

    c5_c5 = (pre == y) & (y == 5)
    c5_c5 = tf.cast(c5_c5, tf.int16)
    c5_c5_acc = tf.reduce_sum(tf.cast(c5_c5, tf.float32)) / (y_5 + 1e-9)

    c6_c6 = (pre == y) & (y == 6)
    c6_c6 = tf.cast(c6_c6, tf.int16)
    c6_c6_acc = tf.reduce_sum(tf.cast(c6_c6, tf.float32)) / (y_6 + 1e-9)

    c7 = ((y == 4) | (y == 5) | (y == 6)) & ((pre == 0) | (pre == 1) | (pre == 2) | (pre == 3))
    c7 = tf.cast(c7, tf.int16)
    c7_acc = tf.reduce_mean(tf.cast(c7, tf.float32))

    # other_one = (pre == 1) & (y != 1)
    # other_one = tf.cast(other_one, tf.int16)
    # other_one_acc = tf.reduce_mean(tf.cast(other_one, tf.float32)) /pre_1
    # return (
    # acc.numpy(), one_one_acc.numpy(), two_two_acc.numpy(), c3_c3_acc.numpy(), c4_c4_acc.numpy(), c5_c5_acc.numpy(),
    # c6_c6_acc.numpy(), c7_acc.numpy())
    return (
        acc, one_one_acc, two_two_acc, c3_c3_acc, c4_c4_acc, c5_c5_acc,
        c6_c6_acc, c7_acc)


def get_acc2(pre, y):
    y_updown = tf.where(y > 0, 1, 0)
    pre_updown = tf.where(pre > 0, 1, 0)

    acc_eq = tf.cast(tf.equal(pre_updown, y_updown), tf.int16)
    acc_float = tf.cast(acc_eq, tf.float32)
    acc = tf.reduce_mean(acc_float)

    # return acc.numpy()
    return acc


def fun_fenbu(x):
    z_0 = tf.cast(x == 0, tf.float32)
    z_0 = tf.reduce_sum(z_0)

    one = tf.cast(x == 1, tf.float32)
    one = tf.reduce_sum(one)

    two = tf.cast(x == 2, tf.float32)
    two = tf.reduce_sum(two)

    three = tf.cast(x == 3, tf.float32)
    three = tf.reduce_sum(three)

    f_4 = tf.cast(x == 4, tf.float32)
    f_4 = tf.reduce_sum(f_4)

    f_5 = tf.cast(x == 5, tf.float32)
    f_5 = tf.reduce_sum(f_5)

    s_6 = tf.cast(x == 6, tf.float32)
    s_6 = tf.reduce_sum(s_6)

    print(
        'Zero:{},\tOne:{},\tTwo:{},\tThree:{},\tFour:{},\tFIVE:{},\tSix:{}'.format(z_0, one, two, three, f_4, f_5, s_6))
    # 其他  # 3天涨2%     # 3天涨4%    # 5天涨8%   # 3天跌-2%      # 3天跌-4%    # 3天跌-8%
    return (z_0, one, two, three, f_4, f_5, s_6)


if __name__ == '__main__':
    # tf.random.set_seed(1)
    # gu_num = 'sz.002782'  # 可立克
    # gu_num = 'sh.600460'  # 士兰微
    # gu_num = 'sz.002739'  # 万达
    # gu_num = 'sh.600313'  # 农发种业
    # gu_num = 'sh.000001'  # 上证
    qian = 60
    hou = 1
    # qian = 30
    # hou = 5
    # split_test = -1
    # split_test = 10
    #   超参数
    # EPOCHS = 1
    EPOCHS = 2000000
    Threshold = 3

    num_layers = 3
    # num_layers = 1

    d_model = 512
    # d_model = 256

    dff = 1024
    # dff = 256

    # num_heads = 8
    num_heads = 8
    dropout_rate = 0.11
    test_split = -1
    # test_split = 40000
    # target_size = 3
    # test_index = 20000
    test_index = -1
    target_size = 7
    # reslut_seq = 60
    learing_rate = 1e-5


    # bat = 300
    bat = 700

    bat = bat*gpu_num

    # guiyi_price = True
    guiyi_price = False
    # load_flag = False
    load_flag = True
    with mirrored_strategy.scope():
        transformer = Transformer2(num_layers, d_model, num_heads, dff, target_size, dropout_rate)
        ##########################  加载模型    ######################################
        model_save_path = './save_model'
        if load_flag:
            # load_name = r"data_410000,epoch_70_trainloss_1.38_testloss_0.00"  # 410000预训练模型  drop = 0.1    80 %
            # load_name = r"data_-1,epoch_80_trainloss_0.77_testloss_0.00"  # -1预训练模型  drop = 0.1     %
            # load_name = r"data_-1,epoch_7_trainloss_0.53_testloss_3.46"  # -1预训练模型  drop = 0.1     %
            # load_name = r"drop_0.31,epoch_138_trainloss_1.30_testloss_3.42"  # -1预训练模型  drop = 0.31     70 %
            load_name = r"drop_0.34,epoch_11_trainloss_1.27_testloss_3.60"  # -1预训练模型  drop = 0.32     73 %
            temp_path = './decode_model'

            load_path = os.path.join(temp_path, load_name)
            load_path = os.path.join(load_path, load_name)
            transformer.load_weights(load_path)
            print('加载模型')
        ##########################  加载模型    ######################################
        # optimizer = tf.keras.optimizers.Adam(learing_rate, beta_1=0.9,
        #                                      beta_2=0.999, epsilon=1e-9)
        # optimizer = tf.keras.optimizers.Nadam(lr=learing_rate, be ta_1=0.9, beta_2=0.999, epsilon=1e-09, schedule_decay=0.004)
        # optimizer = tf.keras.optimizers.Adadelta(lr=learing_rate)
        # optimizer = tf.keras.optimizers.SGD(lr= learing_rate,momentum=0.9)
        optimizer = tf.keras.optimizers.Adamax(lr=learing_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-09)
        # optimizer = tf.keras.optimizers.Adamax(lr=learing_rate, beta_1=0.99, beta_2=0.999, epsilon=1e-09)

    train_list = []
    test_list = []
    train_acc_list = []
    test_acc_list = []
    #   数据加载和处理
    # train_x, train_y = concat_db()
    # data_dir = r'C:\股票\股票包\new_dir\data\all_gu'
    # data = np.load(os.path.join(data_dir,'all_gu.npz'))

    data_dir = './data/all_test'
    data = np.load(os.path.join(data_dir, 'guiyi_False_all_data.npz'))

    train_x, train_y = data['x'][:test_split], data['y'][:test_split]
    db = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(1000000).batch(bat)
    db = mirrored_strategy.experimental_distribute_dataset(db)
    print()
    print('********************     Train数据分布   ********************')
    z_0, one, two, three, f_4, f_5, s_6 = fun_fenbu(train_y[:, -1, -1])

    #######################################       Test    ##############################################333
    val_dir = './data/val_data'
    val_data = np.load(os.path.join(val_dir, 'guiyi_False_val_data.npz'))
    mix_part_x, mix_part_y = val_data['x'][:test_index], val_data['y'][:test_index]
    # mix_part_x, mix_part_y = get_data(gu_num)
    db_test = tf.data.Dataset.from_tensor_slices((mix_part_x, mix_part_y)).batch(bat)
    print('********************     TEST数据分布   ********************')
    z_0, one, two, three, f_4, f_5, s_6 = fun_fenbu(mix_part_y[:, -1, -1])
    with mirrored_strategy.scope():
        for epoch in range(EPOCHS):
            start_time = time.time()
            for batch, (inputs, targets) in enumerate(db):
                train_step2(inputs,targets)
                # continue
                # sys.exit(2)
                if batch % 100 == 0:

                    # train_loss,train_mse_loss, train_acc, train_acc2 = mirrored_strategy.run(train_loss_compute, args=(inputs, targets))
                    train_loss, train_mse_loss, train_acc, train_acc2 = graph_run(train_loss_compute,args_in=(inputs, targets))

                    # print('per_losse:{},per_mse_loss:{}'.format(per_losse,per_mse_loss))
                    mean_loss = mirrored_strategy.reduce(
                        tf.distribute.ReduceOp.SUM, train_loss, axis=None)
                    mean_mse_loss = mirrored_strategy.reduce(
                        tf.distribute.ReduceOp.SUM, train_mse_loss, axis=None)
                    mean_acc = mirrored_strategy.reduce(
                        tf.distribute.ReduceOp.MEAN, train_acc, axis=None)
                    mean_acc2 = mirrored_strategy.reduce(
                        tf.distribute.ReduceOp.MEAN, train_acc2, axis=None)


                    end_time = time.time()
                    # print()
                    print('epoch:{},batch:{}'.format(epoch, batch))
                    print('train_loss:{},\tMSE_loss:{}'.format(mean_loss.numpy(), mean_mse_loss.numpy()))
                    print('ACC标签顺序： acc,\t acc_1,\t acc_2,\t acc_3,\t acc_4,\t acc_5,\t acc_6,\t acc_7')
                    print('train_acc:{}'.format(mean_acc.numpy().round(3)))
                    print('涨幅预测正确train:{:.2f}'.format(mean_acc2))
                    print('time:{:.3f}'.format(end_time - start_time))
                    start_time = time.time()
                    #   测试集测试
                    if batch % 500 == 0:
                        # if batch % 500 == 0 and batch != 0:
                        test_loss_all = 0
                        num = 0
                        acc = np.array([0, 0, 0, 0, 0, 0, 0, 0]).astype(np.float)
                        acc2 = 0
                        for batch_test, (test_inputs, test_targets) in enumerate(db_test):
                            num += 1
                            test_loss, test_acc, test_acc2 = test_loss_compute(test_inputs, test_targets)
                            test_loss = tf.reduce_mean(test_loss).numpy()
                            test_loss_all += test_loss
                            acc += test_acc
                            acc2 += test_acc2

                        # print('test_in_acc:',test_acc)
                        test_loss_all = test_loss_all / (num + 1e-9)
                        acc_all = acc / (num + 1e-9)
                        acc_all2 = acc2 / (num + 1e-9)
                        print('test_loss_all :', test_loss_all)
                        print('test_acc :{}'.format(acc_all.round(3)))
                        print('涨幅预测正确train:{:.2f},test:{:.2f}'.format(global_train_acc2, acc_all2))

            if (epoch % 1 == 0) and (batch != 0):
                # print()
                print('保存模型')

                temp_model_name = 'drop_{},epoch_{}_trainloss_{:.2f}_testloss_{:.2f}'.format(dropout_rate, epoch,
                                                                                             mean_loss.numpy(),
                                                                                             test_loss_all)
                if not os.path.exists('./decode_model'):
                    os.mkdir('./decode_model')
                temp_model_save = os.path.join('./decode_model', temp_model_name)
                if not os.path.exists(temp_model_save):
                    os.mkdir(temp_model_save)
                transformer.save_weights(os.path.join(temp_model_save, temp_model_name))
相关阅读:
haproxy frontend 和backend
haproxy 页面重定向(域名跳转)
LWP::Simple 模块
 Perl LWP模块
 错误代码： 1582 Incorrect parameter count in the call to native function 'str_to_date'
perl 面向对象 -> 符号使用
 跨域访问设置
 mysql 主从复制用户权限限制
 错误代码： 1045 Access denied for user 'skyusers'@'%' (using password: YES)
sync_relay_log
原文地址：https://www.cnblogs.com/cxhzy/p/16008176.html