Transformer+CNN+Smote

import tensorflow as tf
import time
import numpy as np
import matplotlib.pyplot as plt
import sys
from tensorflow import keras
import os
from tensorflow import nn
import math
import random
from sklearn.neighbors import NearestNeighbors
from tensorflow.keras.callbacks import EarlyStopping

#   设置相关底层配置
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# path = '../8000_np_img.npz'
path = '../5000_np_img.npz'
# path = '../2000_np_img.npz'
# path = '../500_np_img.npz'
def split_data(x_data,y_data,amount):
    all_sample = x_data.shape[0]
    indices = np.random.permutation(all_sample)
    indices = list(indices)
    print('indices:',indices,type(indices))
    split_index = int(all_sample*amount)
    train_x= x_data[indices[:split_index ]]
    train_y = y_data[indices[:split_index ]]

    test_x = x_data[indices[split_index :]]
    test_y = y_data[indices[split_index :]]

    return train_x,train_y,test_x,test_y

data = np.load(path)
timelength = 15
#   Img_data=result,Samid_data = result_id,Rain_data = result_rain
train_imgs = data['Img_data'].astype(np.float32)         #   imgs.shape: (500, 15, 4, 101, 101)
# train_imgs = np.reshape(train_imgs[:,14,:,:,:],(-1,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
train_imgs = np.reshape(train_imgs[:,15-timelength:,:,:,:],(-1,timelength,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
print('train_imgs.shape:',train_imgs.shape)

# sys.exit(2)
#   将img归一化到[0,1]
train_imgs = train_imgs / 255.
train_rain = data['Rain_data'].astype(np.float32)      #   rain.shape: (5000, 1)
train_rain = np.reshape(train_rain,(-1,1))

# train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.8)
train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.9999)

#   分割出小雨中雨大雨类型
type_train = np.where(train_rain > 15,2,1)
type_train = np.where(train_rain < 5 ,0,type_train)

#   类型占比的数据字典
num_dict = {}
for i in type_train:
    raintype = i[0]
    if raintype in num_dict.keys():
        num_dict[raintype] += 1
    else:
        num_dict[raintype] = 0
    # print(i)

for key,value in num_dict.items():
    value = value / train_imgs.shape[0]
    num_dict[key] = value
    print('key:{},value:{}'.format(key,value))
print('num_dict[0]:',num_dict[0])
# sys.exit(2)

onehot_train = tf.one_hot(type_train,depth=3)
onehot_train = tf.reshape(onehot_train,(-1,3))
#   获得每个高度的img，返回的字典key:['high0','high1','high2']   每个value的value.shape: (b, 101, 101)
def get_high_img(or_img):
    high_dic = {}
    for i in range(4):
        high_key = 'high{}'.format(i)

        # high_img = or_img[:,i,:,:]
        # high_dic[high_key] = np.reshape(high_img,(-1,101,101))
        high_img = or_img[:, :, i, :, :]
        high_dic[high_key] = np.reshape(high_img, (-1, timelength, 101, 101))
        print('high_key:',high_key)
    return high_dic

which_high = 'high3'
train_high_dic = get_high_img(train_imgs)
train_high0_img = train_high_dic[which_high]

print('train_high0_img.shape:',train_high0_img.shape)
print('high0_img max:{},min:{}'.format(np.max(train_high0_img),np.min(train_high0_img)))

test_high_dic = get_high_img(test_imgs)
test_high0_img = test_high_dic[which_high]

print('train_high0_img:',train_high0_img[0].shape)

def pure_smote(X,Y,K,Max_add=400):
    #   分出强降雨标签数据集  ##############
    strong_rain_label = []      #   强降雨标签序列
    strong_rain_index = []      #   强降雨序号序列
    strong_rain_pic = np.array([])        #   强降雨雷达图序列
    for i_index,i in enumerate(Y):
        i = i[0]
        # print('{}:{}'.format(i_index,i))
        if i >= 30:
            strong_rain_label.append(i)
            strong_rain_index.append(i_index)
            pic = X[i_index]
            # print('pic:',pic.shape)
            pic = np.reshape(pic,(-1,))

            # print('pic:', pic.shape)
            pic = np.reshape(pic, (1, -1))
            if strong_rain_pic.shape[0] == 0:
                print('初始化雷达图')
                # pic = np.reshape(pic,(1,-1))
                print('pic.shape:',pic.shape)
                strong_rain_pic = pic
                # sys.exit(2)
            else:
                strong_rain_pic = np.row_stack((strong_rain_pic,pic))
            # pic = np.reshape(pic, (15,101,101))
            # print('pic:', pic.shape)
            # sys.exit(2)
            ###################   查看雷达图   ##################
            # plt.suptitle('{}'.format(i))
            # for j in range(pic.shape[0]):
            #     plt.subplot(4,4,j+1)
            #     plt.title('{}'.format(j))
            #     plt.imshow(pic[j])
            # plt.show()
            # print('strong_rain_pic:',strong_rain_pic.shape)
    # #   测试强降雨雷达图是否可用    #####################
    # for i in range(strong_rain_pic.shape[0]):
    #     mypic = strong_rain_pic[i]
    #     mypic = np.reshape(mypic,(15,101,101))
    #     ###################   查看雷达图   ##################
    #     plt.suptitle('{}'.format(i))
    #     for j in range(mypic.shape[0]):
    #         plt.subplot(4,4,j+1)
    #         plt.title('{}'.format(j))
    #         plt.imshow(mypic[j])
    #     plt.show()
    print('strong_rain_pic:',strong_rain_pic.shape)
    # sys.exit(2)
    # print('strong_rain_label:',len(strong_rain_label),strong_rain_label)
    # print('strong_rain_index:',len(strong_rain_index),strong_rain_index)
    train_size = len(Y)
    print('train_size：',train_size)
    append_size = (train_size- len(strong_rain_label))//len(strong_rain_label)          #   扩充倍数
    # print('append_size:',append_size)
    # print('扩充后,非强降雨:{},强降雨:{}'.format((train_size- len(strong_rain_label)),len(strong_rain_label)*append_size))
    append_num = len(strong_rain_label)* (append_size-1)
    if append_num >= Max_add:
        append_num = Max_add
    print('append_num:',append_num)
    start = 0
    k = K
    nbrs = NearestNeighbors(n_neighbors=k,algorithm='ball_tree').fit(strong_rain_pic)
    distances, indices = nbrs.kneighbors(strong_rain_pic)
    # print('distances:',distances)
    # print('indices:',indices)
    new_pic = np.array([])          #   生成的图像新数据集
    new_label = np.array([])        #   生成的标签新数据集
    while start<=append_num:
        print('start:',start)
        for pic_index in range(strong_rain_pic.shape[0]):
            cur_pic = strong_rain_pic[pic_index]
            cur_label = strong_rain_label[pic_index]
            # print('cur_pic:',cur_pic.shape)
            # print('cur_pic:',cur_pic)
            alpha = random.uniform(0,1)
            # print('alpha =:',alpha)
            select_rand = random.randint(1, k-1)
            nb_index = indices[pic_index][select_rand]
            # print('nb_index:',nb_index)
            nb_pic = strong_rain_pic[nb_index]
            nb_label = strong_rain_label[nb_index]

            x_new = cur_pic + alpha * (nb_pic-cur_pic)
            y_new = cur_label + alpha * (nb_label - cur_label)

            # print('x_new:',x_new.shape)
            x_new = np.reshape(x_new,(15,101,101))
            cur_pic = np.reshape(cur_pic,(15,101,101))
            nb_pic = np.reshape(nb_pic,(15,101,101))
            #       可视化展示新生成图       ##############
            # plt.subplot(221)
            # plt.title('cur_pic:{}'.format(cur_label))
            # plt.imshow(cur_pic[0])
            #
            # plt.subplot(222)
            # plt.title('nb_pic:{}'.format(nb_label))
            # plt.imshow(nb_pic[0])
            #
            # plt.subplot(223)
            # plt.title('new_pic:{}'.format(y_new))
            # plt.imshow(x_new[0])
            #
            # plt.show()
            # sys.exit(2)
            #       可视化展示新生成图       ##############
            #   将新生成的图和标签加入变量中
            x_new = np.reshape(x_new, (1, 15, 101, 101))
            y_new = np.reshape(y_new, (1, -1))
            if new_pic.shape[0] == 0:
                new_pic = x_new
                new_label = y_new
            else:
                new_pic = np.row_stack((new_pic,x_new))
                new_label = np.row_stack((new_label, y_new))
            # print('new_pic:',new_pic.shape)
            # print('new_label:', new_label.shape)
            start+=1
            if start >= append_num:
                break

    result_pic = np.vstack((X,new_pic))
    result_label = np.vstack((Y,new_label))
    print()
    # for i in range(15):
    #     plt.subplot(4,4,i+1)
    #     plt.title(i)
    #     plt.imshow(result_data[60,i])
    # plt.show()
    # for i in range(15):
    #     plt.subplot(4,4,i+1)
    #     plt.title(i)
    #     plt.imshow(result_data[700,i])
    # plt.show()
    # print(' result_pic:', result_pic.shape)
    # print(' result_label:', result_label.shape)
    return  result_pic,result_label

deal_x,deal_y = pure_smote(train_high0_img,train_rain,K = 6)
print(' deal_x:', deal_x.shape)
print(' deal_y:', deal_y.shape)

####################################################   模型  ######################################################
def positional_encoding(pos, d_model):
    '''
    :param pos: 词在句子中的位置，句子上的维族；（i是d_model上的维度）
    :param d_model: 隐状态的维度，相当于num_units
    :return: 位置编码 shape=[1, position_num, d_model], 其中第一个维度是为了匹配batch_size
    '''
    def get_angles(position, i):
        # 这里的i相当于公式里面的2i或2i+1
        # 返回shape=[position_num, d_model]
        return position / np.power(10000., 2. * (i // 2.) / np.float(d_model))

    angle_rates = get_angles(np.arange(pos)[:, np.newaxis],
                             np.arange(d_model)[np.newaxis, :])
    # 2i位置使用sin编码，2i+1位置使用cos编码
    pe_sin = np.sin(angle_rates[:, 0::2])
    pe_cos = np.cos(angle_rates[:, 1::2])
    pos_encoding = np.concatenate([pe_sin, pe_cos], axis=-1)
    pos_encoding = tf.cast(pos_encoding[np.newaxis, ...], tf.float32)
    return pos_encoding

'''*************** 第一部分: Scaled dot-product attention ***************'''
def my_mask(inputs):
    # print('inputs:',inputs.shape)
    # print('mask前：',inputs)
    s_dim = inputs.shape[-1]
    mask = tf.ones((s_dim, s_dim))  # (T_q, T_k)
    # 这一句的意思是生成一个上三角矩阵，上三角矩阵用来对decoder的结果进行mask
    mask = tf.linalg.LinearOperatorLowerTriangular(mask).to_dense()
    padding_num = -2 ** 32 + 1.1
    mask_data = tf.multiply(mask, inputs)
    # print('mask_data:',float(mask_data))
    outputs = tf.where(tf.equal(mask_data, 0.), padding_num, mask_data)
    # so_data = tf.nn.softmax(outputs,axis=1)
    # print('mask后：',  outputs)
    # sys.exit(2)
    return outputs

def scaled_dot_product_attention(q, k, v, mask = None):
    '''attention(Q, K, V) = softmax(Q * K^T / sqrt(dk)) * V'''
    # query 和 Key相乘
    # print('q:',q.shape,q)
    matmul_qk = tf.matmul(q, k, transpose_b=True)
    # print('matmul_qk:',matmul_qk.shape,matmul_qk)
    # print('完毕')
    # 使用dk进行缩放
    dk = tf.cast(tf.shape(q)[-1], tf.float32)
    scaled_attention =matmul_qk / tf.math.sqrt(dk)
    # 掩码mask
    if mask is not None:
        print('有mask')
        scaled_attention = my_mask(scaled_attention)
    # 通过softmax获取attention权重, mask部分softmax后为0
    attention_weights = tf.nn.softmax(scaled_attention)  # shape=[batch_size, seq_len_q, seq_len_k]
    # print('attention_weights:',attention_weights)
    # 乘以value
    outputs = tf.matmul(attention_weights, v)  # shape=[batch_size, seq_len_q, depth]
    return outputs, attention_weights

'''*************** 第二部分: Multi-Head Attention ***************'''
'''
multi-head attention包含3部分： - 线性层与分头 - 缩放点积注意力 - 头连接 - 末尾线性层
每个多头注意块有三个输入; Q（查询），K（密钥），V（值）。 它们通过第一层线性层并分成多个头。
注意:点积注意力时需要使用mask， 多头输出需要使用tf.transpose调整各维度。
Q，K和V不是一个单独的注意头，而是分成多个头，因为它允许模型共同参与来自不同表征空间的不同信息。
在拆分之后，每个头部具有降低的维度，总计算成本与具有全维度的单个头部注意力相同。
'''
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        # d_model必须可以正确分成多个头
        assert d_model % num_heads == 0
        # 分头之后维度
        self.depth = d_model // num_heads
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        # 分头，将头个数的维度，放到seq_len前面 x输入shape=[batch_size, seq_len, d_model]
        # print('split前.shape:',x.shape)
        x = tf.reshape(x, [batch_size, -1, self.num_heads, self.depth])
        # print('split后.shape:', x.shape)
        return tf.transpose(x, perm=[0, 2, 1, 3])



    def call(self, q, k, v, mask = None):
        # print('-----------------------------    multi   ---------------------------------------')
        # print('multil_q:',q.shape,q)
        # print('********************************   multi   ************************************')
        batch_size = tf.shape(q)[0]
        # 分头前的前向网络，根据q,k,v的输入，计算Q, K, V语义
        q = self.wq(q)  # shape=[batch_size, seq_len_q, d_model]
        k = self.wq(k)
        v = self.wq(v)
        # 分头
        q = self.split_heads(q, batch_size)  # shape=[batch_size, num_heads, seq_len_q, depth]
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        # 通过缩放点积注意力层
        # scaled_attention shape=[batch_size, num_heads, seq_len_q, depth]
        # attention_weights shape=[batch_size, num_heads, seq_len_q, seq_len_k]
        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
        # 把多头维度后移
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # shape=[batch_size, seq_len_q, num_heads, depth]
        # 把多头合并
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) # shape=[batch_size, seq_len_q, d_model]
        # 全连接重塑
        output = self.dense(concat_attention)
        return output, attention_weights

class LayerNormalization(tf.keras.layers.Layer):
    def __init__(self, epsilon=1e-8, **kwargs):
        super(LayerNormalization, self).__init__(**kwargs)
        self.epsilon = epsilon
    def build(self, input_shape):
        self.gamma = self.add_weight(name='gamma',
                                     shape=input_shape[-1:],
                                     initializer=tf.ones_initializer(),
                                     trainable=True)
        self.beta = self.add_weight(name='beta',
                                    shape=input_shape[-1:],
                                    initializer=tf.zeros_initializer(),
                                    trainable=True)
        super(LayerNormalization, self).build(input_shape)
    def call(self, x): # x shape=[batch_size, seq_len, d_model]
        mean = tf.keras.backend.mean(x, axis=-1, keepdims=True)
        std = tf.keras.backend.std(x, axis=-1, keepdims=True)
        return self.gamma * (x - mean) / (std + self.epsilon) + self.beta

def point_wise_feed_forward(d_model, diff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(diff, activation=tf.nn.relu),
        tf.keras.layers.Dense(d_model)
    ])
'''encoder layer:
每个编码层包含以下子层 - Multi-head attention（带掩码） - Point wise feed forward networks
每个子层中都有残差连接，并最后通过一个正则化层。残差连接有助于避免深度网络中的梯度消失问题。 
每个子层输出是LayerNorm(x + Sublayer(x))，规范化是在d_model维的向量上。Transformer一共有n个编码层。
'''
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward(d_model, dff)
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, training):
        # multi head attention (encoder时Q = K = V)
        att_output, _ = self.mha(inputs, inputs, inputs)
        att_output = self.dropout1(att_output, training=training)
        output1 = self.layernorm1(inputs + att_output)  # shape=[batch_size, seq_len, d_model]
        # feed forward network
        ffn_output = self.ffn(output1)
        ffn_output = self.dropout2(ffn_output, training=training)
        output2 = self.layernorm2(output1 + ffn_output)  # shape=[batch_size, seq_len, d_model]
        return output2

class Encoder(tf.keras.layers.Layer):
    def __init__(self, d_model, num_layers, num_heads, dff,
               max_seq_len, dropout_rate=0.1):
        super(Encoder, self).__init__()
        self.indata = tf.keras.layers.Dense(d_model)
        self.num_layers = num_layers
        self.d_model = d_model
        # self.emb = tf.keras.layers.Embedding(5000, d_model)  # shape=[batch_size, seq_len, d_model]
        self.pos_encoding = positional_encoding(max_seq_len, d_model)  # shape=[1, max_seq_len, d_model]
        self.encoder_layer = [EncoderLayer(d_model, num_heads, dff, dropout_rate)
                              for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, training):
        # print('inputs.shape:',inputs.shape)
        # sys.exit(2)
        # 输入部分；inputs shape=[batch_size, seq_len]
        seq_len = inputs.shape[1]  # 句子真实长度
        # word_embedding = self.emb(inputs)  # shape=[batch_size, seq_len, d_model]
        # print('word_embedding .shape:',word_embedding .shape)
        # sys.exit(2)
        # word_embedding = self.indata(inputs)
        word_embedding = inputs
        word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        emb= word_embedding + self.pos_encoding[:, :seq_len, :]
        x = self.dropout(emb, training=training)
        for i in range(self.num_layers):
            x = self.encoder_layer[i](x, training)
        return x  # shape=[batch_size, seq_len, d_model]

class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
        super(DecoderLayer, self).__init__()
        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward(d_model, dff)
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        self.layernorm3 = LayerNormalization()
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout3 = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, encoder_out, training):
        # masked multi-head attention: Q = K = V
        # print('decode_inputs:',inputs)
        # sys.exit(2)

        att_out1, att_weight1 = self.mha1(inputs, inputs, inputs,mask = True)

        att_out1 = self.dropout1(att_out1, training=training)
        att_out1 = self.layernorm1(inputs + att_out1)
        # multi-head attention: Q=att_out1, K = V = encoder_out
        # print('-------------------------    mh2 ---------------------------------------')
        att_out2, att_weight2 = self.mha2(att_out1, encoder_out, encoder_out)
        # print('att_out2 :', att_out2)
        att_out2 = self.dropout2(att_out2, training=training)
        att_out2 = self.layernorm2(att_out1 + att_out2)
        # att_out2 = self.layernorm2(att_out2)
        # print('att_out2 :',att_out2 )
        # sys.exit(2)
        # feed forward network
        ffn_out = self.ffn(att_out2)
        ffn_out = self.dropout3(ffn_out, training=training)
        output = self.layernorm3(att_out2 + ffn_out)
        return output, att_weight1, att_weight2

class Decoder(tf.keras.layers.Layer):
    def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
        super(Decoder, self).__init__()
        self.seq_len = tf.shape
        self.indata = tf.keras.layers.Dense(d_model)
        self.d_model = d_model
        self.num_layers = num_layers
        self.pos_encoding = positional_encoding(max_seq_len, d_model)
        self.decoder_layers = [DecoderLayer(d_model, num_heads, dff, dropout_rate)
                               for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, encoder_out, training):
        # print('---------------------------------    decode  --------------------------------')
        seq_len = inputs.shape[1]
        attention_weights = {}
        word_embedding = self.indata(inputs)
        # print('inputs:',inputs.shape)
        # print('self.pos_encoding[:, :seq_len, :]:',self.pos_encoding[:, :seq_len, :].shape)
        # word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        # emb = word_embedding + self.pos_encoding[:, :seq_len, :]
        emb = word_embedding
        # print('emb:',emb)
        # sys.exit(2)

        x = self.dropout(emb, training=training)
        for i in range(self.num_layers):
            # print('i:',i)
            x, att1, att2 = self.decoder_layers[i](x, encoder_out, training)
            attention_weights['decoder_layer{}_att_w1'.format(i+1)] = att1
            attention_weights['decoder_layer{}_att_w2'.format(i + 1)] = att2
        return x, attention_weights

def deinput_padding(seq_len,dim,batch_size):
    # print('pading_batch_size:',batch_size)
    zero = np.zeros((seq_len,dim))
    # print('zero:',zero.shape)
    zero[0,:] = 1

    zero = np.tile(zero,(batch_size,1,1))
    # print('zero:', zero, zero.shape)
    return zero

#   超参数
learn_rate = 2e-4
# learn_rate = 2.0e-4
epochs =5000
bat = 10

#
base_dim = 8
mid_dim = 10
dense_dim = 100
time_list = [1,2,4,8,16,32]

unit = 40
def c_b(chanel,kernel_size,stride = 1 ,padding ='valid'):
    initial = tf.keras.initializers.TruncatedNormal(stddev=0.02)
    result = keras.Sequential([
        tf.keras.layers.Conv2D(chanel, kernel_size=kernel_size, strides=stride, padding=padding,
                               kernel_initializer=initial),
        tf.keras.layers.BatchNormalization(),
        keras.layers.LeakyReLU()
    ])
    return result
class Transformer(tf.keras.Model):
    def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
        super(Transformer, self).__init__()
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        # self.mycnn = tf.keras.Sequential([
        #     c_b(6, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(12, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(24, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(48, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(48, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #     tf.keras.layers.Flatten()
        # ])
        self.mycnn = tf.keras.Sequential([
            c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
            # # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            c_b(base_dim * time_list[0], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[1], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[2], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[3], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[4], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[5], [3, 3], stride=2, padding='SAME'),

            # tf.keras.layers.Flatten()

            tf.keras.layers.GlobalAveragePooling2D()
        ])

        self.myDense1 = tf.keras.Sequential([
            # tf.keras.layers.Dense(10, activation='relu'),
            tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),
            tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),

            tf.keras.layers.Dense(d_model)
            # tf.keras.layers.Dense(3)
        ])
        self.encoder = Encoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
        self.emb = tf.keras.layers.Dense(d_model)
        # self.decoder = Decoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
        self.dim_dense = tf.keras.layers.Dense(1)
        self.final_layer = tf.keras.layers.Dense(1)
        self.flat = tf.keras.layers.Flatten()
    def call(self, inputs):
        inputs = tf.cast(inputs, dtype=tf.float32)
        inputs = tf.reshape(inputs, (-1, 15, 101, 101, 1))

        inputs = tf.keras.layers.TimeDistributed(self.mycnn)(inputs)

        # print('out.shape:', out.shape,out[0,0,:5])
        out = tf.keras.layers.TimeDistributed(self.myDense1)(inputs)
        inputs = out
        # sys.exit(2)
        inputs = self.layernorm1(inputs)
        inputs = self.emb(inputs)
        print('trains_inputs:',inputs)
        # print('inputs.shape:', inputs.shape)
        # sys.exit(2)
        # 首先encoder过程，输出shape=[batch_size, seq_len_input, d_model]
        inputs = self.layernorm2(inputs)
        print('layerhoutrains_inputs:', inputs)
        encoder_output = self.encoder(inputs)
        # 再进行decoder, 输出shape=[batch_size, seq_len_target, d_model]
        encoder_output = self.dim_dense(encoder_output)
        print('encoder_output:',encoder_output.shape)
        encoder_output = self.flat(encoder_output)
        print('encoder_output:', encoder_output.shape)
        final_out = self.final_layer(encoder_output)
        # sys.exit(2)


        #######################################      Decode      ################################
        # decode_input = deinput_padding(s_dim,in_dim,bat_size)
        #                                             #   (inputs, encoder_out, training)
        # # print(' decode_input ', decode_input )
        # # print('encoder_output:',encoder_output.shape)
        # # print('decode_input:',decode_input.shape)
        # decoder_output, att_weights = self.decoder(decode_input, encoder_output, True)
        # # print('decoder_output:',decoder_output)
        # # sys.exit(2)
        # # 最后映射到输出层
        # final_out = self.final_layer(decoder_output) # shape=[batch_size, seq_len_target, target_vocab_size]
        # # print('final_out:',final_out.shape)
        # final_out = final_out[:,-2,:]
        # # print('final_out:', final_out.shape)
        # #######################################      Decode      ################################
        return final_out

train_db = tf.data.Dataset.from_tensor_slices((deal_x, deal_y)).shuffle(500).batch(bat)
# train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).repeat()
test_db = tf.data.Dataset.from_tensor_slices((test_high0_img, test_rain)).batch(bat)

opt = tf.keras.optimizers.Adam(lr=learn_rate,clipnorm=0.1)
early_stoping = EarlyStopping(monitor='val_loss',patience=100)
my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
# ###################   加载模型    ####################################
# model_name = './my_save_model/trans_model_LOSS1/transmodel_LOSS1.ckpt'
# my_model.load_weights(model_name)
# ###################   加载模型    ####################################
my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
# my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,callbacks=early_stoping)
my_model.fit(train_db,validation_data=test_db,epochs=200, validation_freq=1,callbacks=early_stoping)
my_model.evaluate(test_db)

#   保存模型
model_name = './my_save_model/pure_smote_model_5/my_model_5.ckpt'
# model_name = 'my_model1.ckpt'
my_model.save_weights(model_name)
print('保存完成')
del (my_model)
#   加载模型
my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
model_name = model_name
my_model.load_weights(model_name)
my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
print('加载完成')
相关阅读:
PHP 语法
 PHP 安装
 06_传智播客iOS视频教程_方法的本质是SEL消息
 05_传智播客iOS视频教程_类对象的使用
 04_传智播客iOS视频教程_类是以Class对象存储在代码段
 03_传智播客iOS视频教程_作业讲解及结构体与类的区别
 02_传智播客iOS视频教程_子类在内存中的存储和方法调用过程
 01_传智播客iOS视频教程_课程介绍与知识点回顾
 Day01-Objective-C语法基础-video 01_传智播客iOS视频教程_OC的简要历史
 17_关于上下文的说明
原文地址：https://www.cnblogs.com/cxhzy/p/14713982.html