Below is encoder layer code:
class Encoder(tf.keras.layers.Layer):
“”"
The entire Encoder starts by passing the input to an embedding layer
and using positional encoding to then pass the output through a stack of
encoder Layers
"""
def __init__(self, num_layers, embedding_dim, num_heads, fully_connected_dim, input_vocab_size,
maximum_position_encoding, dropout_rate=0.1, layernorm_eps=1e-6):
super(Encoder, self).__init__()
self.embedding_dim = embedding_dim
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, self.embedding_dim)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.embedding_dim)
self.enc_layers = [EncoderLayer(embedding_dim=self.embedding_dim,
num_heads=num_heads,
fully_connected_dim=fully_connected_dim,
dropout_rate=dropout_rate,
layernorm_eps=layernorm_eps)
for _ in range(self.num_layers)]
self.dropout = tf.keras.layers.Dropout(dropout_rate)
def call(self, x, training, mask):
"""
Forward pass for the Encoder
Arguments:
x (tf.Tensor): Tensor of shape (batch_size, seq_len, embedding_dim)
training (bool): Boolean, set to true to activate
the training mode for dropout layers
mask (tf.Tensor): Boolean mask to ensure that the padding is not
treated as part of the input
Returns:
x (tf.Tensor): Tensor of shape (batch_size, seq_len, embedding_dim)
"""
seq_len = tf.shape(x)[1]
# Pass input through the Embedding layer
x = self.embedding(x) # (batch_size, input_seq_len, embedding_dim)
# Scale embedding by multiplying it by the square root of the embedding dimension
x *= tf.math.sqrt(tf.cast(self.embedding_dim, tf.float32))
# Add the position encoding to embedding
x += self.pos_encoding[:, :seq_len, :]
# Pass the encoded embedding through a dropout layer
# use `training=training`
x = self.dropout(x, training=training)
# Pass the output through the stack of encoding layers
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, embedding_dim)
Regarding comments to call method: Arguments:
x (tf.Tensor): Tensor of shape (batch_size, seq_len, embedding_dim)
why does input to call method x have embedding_dim as it’s last dimension? It has not passed through embedding layer yet… ?
Thank you.
DS