• 命名实体识别之bert+bilstm(基于tensorflow)


    接下来我们继续对官方基于bert的模型进行扩展,之前的可参考:

    基于bert命名实体识别(一)数据处理 

    命名实体识别数据预处理 

    命名实体识别之创建训练数据 

    命名实体识别之使用tensorflow的bert模型进行微调 

    命名实体识别之动态融合不同bert层的特征(基于tensorflow)

    直接看代码:

    class MyModel:
      def __init__(self, config):
        self.config = config
        # 喂入模型的数据占位符
        self.input_x_word = tf.placeholder(tf.int32, [None, None], name="input_x_word")
        self.input_x_len = tf.placeholder(tf.int32, name='input_x_len')
        self.input_mask = tf.placeholder(tf.int32, [None, None], name='input_mask')
        self.input_relation = tf.placeholder(tf.int32, [None, None], name='input_relation')  # 实体NER的真实标签
        self.keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
        self.is_training = tf.placeholder(tf.bool, None, name='is_training')
        self.initializer = initializers.xavier_initializer()
        self.lstm_dim = self.config.lstm_dim
        self.relation_num = self.config.relation_num
        self.num_steps = tf.shape(self.input_x_word)[-1]
        print("self.num_steps.shape[-1]:",tf.shape(self.input_x_word)[-1])
        self.bert_embed(bert_init=True)
      
    
      def biLSTM_layer(self, lstm_inputs, lstm_dim, lengths, name=None):
            """
            :param lstm_inputs: [batch_size, num_steps, emb_size]
            :return: [batch_size, num_steps, 2*lstm_dim]
            """
            with tf.name_scope("char_BiLSTM" if not name else name):
                lstm_cell = {}
                for direction in ["forward", "backward"]:
                    with tf.name_scope(direction):
                        lstm_cell[direction] = rnn.CoupledInputForgetGateLSTMCell(
                            lstm_dim,
                            use_peepholes=True,
                            initializer=self.initializer,
                            state_is_tuple=True)
                outputs, final_states = tf.nn.bidirectional_dynamic_rnn(
                    lstm_cell["forward"],
                    lstm_cell["backward"],
                    lstm_inputs,
                    dtype=tf.float32,
                    sequence_length=lengths)
            return tf.concat(outputs, axis=2)
      def project_layer(self, lstm_outputs, name=None):
          """
          hidden layer between lstm layer and logits
          :param lstm_outputs: [batch_size, num_steps, emb_size]
          :return: [batch_size, num_steps, num_tags]
          """
          with tf.name_scope("project" if not name else name):
              with tf.name_scope("hidden"):
                  W = tf.get_variable("HW", shape=[self.lstm_dim * 2, self.lstm_dim],
                                      dtype=tf.float32, initializer=self.initializer)
    
                  b = tf.get_variable("Hb", shape=[self.lstm_dim], dtype=tf.float32,
                                      initializer=tf.zeros_initializer())
                  output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim * 2])
                  hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))
    
              # project to score of tags
              with tf.name_scope("logits"):
                  W = tf.get_variable("LW", shape=[self.lstm_dim, self.relation_num],
                                      dtype=tf.float32, initializer=self.initializer)
    
                  b = tf.get_variable("Lb", shape=[self.relation_num], dtype=tf.float32,
                                      initializer=tf.zeros_initializer())
    
                  pred = tf.nn.xw_plus_b(hidden, W, b)
    
              return tf.reshape(pred, [-1, self.num_steps, self.relation_num], name='pred_logits')
      
      def loss_without_crf(self, output_layer, num_labels, bert_init=True):
        with tf.variable_scope("loss"):
          self.logits = output_layer
          self.probabilities = tf.nn.softmax(self.logits, axis=-1)
          log_probs = tf.nn.log_softmax(self.logits, axis=-1) # [?,11]
          print("log_probs.shape:",log_probs.shape)
    
          self.predictions = tf.argmax(self.logits, axis=-1, name="predictions")
    
          one_hot_labels = tf.one_hot(self.input_relation, depth=num_labels, dtype=tf.float32) # [?,512,11]
          #print(one_hot_labels)
          #print("one_hot_labels.shape:",one_hot_labels.shape)
          self.per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
          #print("self.per_example_loss.shape:",self.per_example_loss.shape)
          self.loss = tf.reduce_mean(self.per_example_loss)
          print(self.loss)
          #print("self.loss.shape:",self.loss.shape)
          tvars = tf.trainable_variables()
          init_checkpoint = self.config.bert_file 
          assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
          if bert_init:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          tf.logging.info("**** Trainable Variables ****")
          for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            print("  name = {}, shape = {}{}".format(var.name, var.shape, init_string))
          print('init bert from checkpoint: {}'.format(init_checkpoint))
          #return self.loss, self.per_example_loss, self.logits, self.probabilities
          
    
    
      def bert_embed(self, bert_init=True):
        """
        读取BERT的TF模型
        :param bert_init:
        :return:
        """
        num_labels = self.config.relation_num
        bert_config_file = self.config.bert_config_file
        bert_config = BertConfig.from_json_file(bert_config_file)
    
        model = BertModel(
            config=bert_config,
            is_training=self.is_training,  # 微调
            input_ids=self.input_x_word,
            input_mask=self.input_mask,
            token_type_ids=None,
            use_one_hot_embeddings=False)
    
        # If you want to use the token-level output, use model.get_sequence_output()
        # output_layer = model.get_pooled_output() # [?,768]
        # print("output_layer.shape:",output_layer)
        used = tf.sign(tf.abs(self.input_x_word))
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)
        output_layer = model.get_sequence_output()
        lstm_inputs = tf.nn.dropout(output_layer, 0.9)
        output_layer = self.biLSTM_layer(lstm_inputs, self.lstm_dim, self.lengths)
        output_layer = self.project_layer(output_layer)
        print("output_layer.shape:", output_layer.shape)
        self.loss_without_crf(output_layer, num_labels)
        import sys
        sys.exit(0)

    结果:

    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/optimization.py:155: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.
    
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/bert/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.
    
    {'', '', '', '', 'Q', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Z', 'U', '', 'X', '', '', 'F', 'L', '', '', '', '', '', '', '', '', '', '', '', '', 'I', '', '', '', '', '', '', 'T', '', '', '', '', '', 'G', '', 'O', 'V', '', '', '', '`', '', '', '', '', '', 'W', 'S', 'N', '', '', '', '', 'K', '', '', '', '', 'Y', 'R', '', '', '', '', '', '', '', '', 'D', '', '', '', '', '', '', '', 'E', '', '', 'B', 'J', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'ue40a', '', '', '', '', '', '', '', '', 'C', '', '', '', '', '', '', '', '', '', '', 'P', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'M', 'A', 'H', '', '', '', '', ''}
    8012
    {'', 'C', 'T', '', 'Q', 'D', 'G', '', '', 'P', '', '', '', 'E', 'O', 'V', '', 'Z', 'J', 'B', 'U', 'X', '', 'F', 'L', '', 'W', '', 'N', 'S', 'K', '', '', 'A', 'H', 'Y', 'M', 'R', 'I'}
    1105
    WARNING:tensorflow:From test_bert.py:388: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
    
    2020-12-13 14:07:47.209770: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
    2020-12-13 14:07:47.265991: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:47.266613: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties: 
    name: Tesla T4 major: 7 minor: 5 memoryClockRate(GHz): 1.59
    pciBusID: 0000:00:04.0
    2020-12-13 14:07:47.266923: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
    2020-12-13 14:07:47.493085: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
    2020-12-13 14:07:47.621614: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
    2020-12-13 14:07:47.641392: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
    2020-12-13 14:07:47.925153: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
    2020-12-13 14:07:47.943921: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
    2020-12-13 14:07:48.468415: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
    2020-12-13 14:07:48.468625: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.469411: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.470004: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
    2020-12-13 14:07:48.525931: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
    2020-12-13 14:07:48.526210: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x270ef40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
    2020-12-13 14:07:48.526244: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
    2020-12-13 14:07:48.677879: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.678754: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x270f100 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
    2020-12-13 14:07:48.678790: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
    2020-12-13 14:07:48.679588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.680198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties: 
    name: Tesla T4 major: 7 minor: 5 memoryClockRate(GHz): 1.59
    pciBusID: 0000:00:04.0
    2020-12-13 14:07:48.680265: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
    2020-12-13 14:07:48.680295: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
    2020-12-13 14:07:48.680319: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
    2020-12-13 14:07:48.680346: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
    2020-12-13 14:07:48.680371: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
    2020-12-13 14:07:48.680393: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
    2020-12-13 14:07:48.680416: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
    2020-12-13 14:07:48.680497: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.681158: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.681699: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
    2020-12-13 14:07:48.684658: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
    2020-12-13 14:07:48.686073: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
    2020-12-13 14:07:48.686103: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186]      0 
    2020-12-13 14:07:48.686114: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0:   N 
    2020-12-13 14:07:48.687110: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.687768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
    2020-12-13 14:07:48.688359: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
    2020-12-13 14:07:48.688404: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 14221 MB memory) -> physical GPU (device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5)
    WARNING:tensorflow:From test_bert.py:176: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
    
    self.num_steps.shape[-1]: Tensor("strided_slice_1:0", shape=(), dtype=int32)
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:175: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
    
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:416: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
    
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:497: The name tf.assert_less_equal is deprecated. Please use tf.compat.v1.assert_less_equal instead.
    
    WARNING:tensorflow:
    The TensorFlow contrib module will not be included in TensorFlow 2.0.
    For more information, please see:
      * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
      * https://github.com/tensorflow/addons
      * https://github.com/tensorflow/io (for I/O related ops)
    If you depend on functionality not listed there, please file an issue.
    
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:364: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
    Instructions for updating:
    Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:874: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
    Instructions for updating:
    Use keras.layers.Dense instead.
    WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
    Instructions for updating:
    Please use `layer.__call__` method instead.
    WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:282: The name tf.erf is deprecated. Please use tf.math.erf instead.
    
    WARNING:tensorflow:From test_bert.py:209: bidirectional_dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
    Instructions for updating:
    Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
    WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/rnn.py:464: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
    Instructions for updating:
    Please use `keras.layers.RNN(cell)`, which is equivalent to this API
    WARNING:tensorflow:Entity <bound method CoupledInputForgetGateLSTMCell.call of <tf_utils.rnncell.CoupledInputForgetGateLSTMCell object at 0x7f297e2d2eb8>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
    WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/rnn.py:244: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
    Instructions for updating:
    Use tf.where in 2.0, which has the same broadcast rule as np.where
    WARNING:tensorflow:Entity <bound method CoupledInputForgetGateLSTMCell.call of <tf_utils.rnncell.CoupledInputForgetGateLSTMCell object at 0x7f297e2d2fd0>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
    WARNING:tensorflow:From test_bert.py:225: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.
    
    output_layer.shape: (?, ?, 11)
    log_probs.shape: (?, ?, 11)
    self.per_example_loss.shape: (?, ?)
    self.loss.shape: ()
    WARNING:tensorflow:From test_bert.py:255: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.
    
    WARNING:tensorflow:From test_bert.py:259: The name tf.train.init_from_checkpoint is deprecated. Please use tf.compat.v1.train.init_from_checkpoint instead.
    
    WARNING:tensorflow:From test_bert.py:260: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
    
      name = bert/embeddings/word_embeddings:0, shape = (21128, 768), *INIT_FROM_CKPT*
      name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
      name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
      name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_1/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_3/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_4/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_5/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_6/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_7/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_8/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_9/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_10/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
      name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
      name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_xi:0, shape = (768, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_hi:0, shape = (256, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_ci:0, shape = (256, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_xo:0, shape = (768, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_ho:0, shape = (256, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_co:0, shape = (256, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_xc:0, shape = (768, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_hc:0, shape = (256, 256)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_b_i:0, shape = (256,)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_b_c:0, shape = (256,)
      name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_b_o:0, shape = (256,)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_xi:0, shape = (768, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_hi:0, shape = (256, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_ci:0, shape = (256, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_xo:0, shape = (768, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_ho:0, shape = (256, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_co:0, shape = (256, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_xc:0, shape = (768, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_hc:0, shape = (256, 256)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_b_i:0, shape = (256,)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_b_c:0, shape = (256,)
      name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_b_o:0, shape = (256,)
      name = HW:0, shape = (512, 256)
      name = Hb:0, shape = (256,)
      name = LW:0, shape = (256, 11)
      name = Lb:0, shape = (11,)
    init bert from checkpoint: /content/drive/MyDrive/Deep-Learning-With-Python/chapter8/CCF_ner/bert_pretrained/bert_model.ckpt
    WARNING:tensorflow:From test_bert.py:392: The name tf.train.exponential_decay is deprecated. Please use tf.compat.v1.train.exponential_decay instead.
    
    bert train variable num: 199
    normal train variable num: 26
    word2vec trainable!!

    说明:

    我们可以直接调用官方的tensorflow的bert模型来使用bert,接下来,我们使用output_layer = model.get_sequence_output()来获得最后一层的特征,然后接下来在添加bilstm层,

    对于bilstm的前向和反向的输出进行拼接后,经过一个project_layer()函数计算logits,最后再经过一个损失层计算损失和其它的一些预测的值等。同时我们要将预训练bert模型的参数导入到bert中。

    这里面我们可以通过这种方式计算每个序列的长度:

    used = tf.sign(tf.abs(self.input_x_word))
    length
    = tf.reduce_sum(used, reduction_indices=1) self.lengths = tf.cast(length, tf.int32)

    当然,在喂入数据的时候,我们也已经传入了长度了,可以酌情使用。

    当bert+bilstm之后,一般而言bert微调的学习率和bilstm的学习率是要设置成不同的,比如一下代码:

    # 超参数设置
                global_step = tf.Variable(0, name='step', trainable=False)
                learning_rate = tf.train.exponential_decay(config.learning_rate, global_step, config.decay_step,
                                                           config.decay_rate, staircase=True)
    
                normal_optimizer = tf.train.AdamOptimizer(learning_rate)  # 下接结构的学习率
    
                all_variables = graph.get_collection('trainable_variables')
                word2vec_var_list = [x for x in all_variables if 'bert' in x.name]  # BERT的参数
                normal_var_list = [x for x in all_variables if 'bert' not in x.name]  # 下接结构的参数
                print('bert train variable num: {}'.format(len(word2vec_var_list)))
                print('normal train variable num: {}'.format(len(normal_var_list)))
                normal_op = normal_optimizer.minimize(model.loss, global_step=global_step, var_list=normal_var_list)
                num_batch = int(train_iter.num_records / config.batch_size * config.train_epoch)
                embed_step = tf.Variable(0, name='step', trainable=False)
                if word2vec_var_list:  # 对BERT微调
                    print('word2vec trainable!!')
                    word2vec_op, embed_learning_rate, embed_step = create_optimizer(
                        model.loss, config.embed_learning_rate, num_train_steps=num_batch,
                        num_warmup_steps=int(num_batch * 0.05) , use_tpu=False ,  variable_list=word2vec_var_list
                    )
    
                    train_op = tf.group(normal_op, word2vec_op)  # 组装BERT与下接结构参数
                else:
                    train_op = normal_op

    一般bert+bilstm之后还需要接一个crf(条件随机场),我们下节继续。

  • 相关阅读:
    法师 南邮NOJ网络赛2083
    法师 南邮NOJ网络赛2083
    法师 南邮NOJ网络赛2083
    水NOJ Duplicate Removal
    水NOJ Duplicate Removal
    水NOJ Duplicate Removal
    水NOJ Duplicate Removal
    【POJ】3630 Phone List
    【POJ】2503 Babelfish
    【POJ】1451 T9
  • 原文地址:https://www.cnblogs.com/xiximayou/p/14130738.html
Copyright © 2020-2023  润新知