x = self.embedding(x) # (batch_size, target_seq_len, fully_connected_dim)
# scale embeddings by multiplying by the square root of their dimension
x *= tf.math.sqrt(tf.cast(self.embedding_dim, tf.float32))
# calculate positional encodings and add to word embedding
x += self.pos_encoding[:, :seq_len, :]
# apply a dropout layer to x
x = self.dropout(x, training=training)
# use a for loop to pass x through a stack of decoder layers and update attention_weights (~4 lines total)
for i in range(self.num_layers):
# pass x and the encoder output through a stack of decoder layers and save the attention weights
# of block 1 and 2 (~1 line)
x, block1, block2 = self.dec_layers[i](x, enc_output, training,
look_ahead_mask, padding_mask)
#update attention_weights dictionary with the attention weights of block 1 and block 2
attention_weights['decoder_layer{}_block1_self_att'.format(i+1)] = block1
attention_weights['decoder_layer{}_block2_decenc_att'.format(i+1)] = block2
Can someone tell me whats wrong with my code I am getting Wrong values in outd when training=True and use padding mask
this as the error