No It does not. Here is my corrected code:
# UNQ_C4 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION EncoderLayer
class EncoderLayer(tf.keras.layers.Layer):
"""
The encoder layer is composed by a multi-head self-attention mechanism,
followed by a simple, positionwise fully connected feed-forward network.
This architecture includes a residual connection around each of the two
sub-layers, followed by layer normalization.
"""
def __init__(self, embedding_dim, num_heads, fully_connected_dim,
dropout_rate=0.1, layernorm_eps=1e-6):
super(EncoderLayer, self).__init__()
self.mha = MultiHeadAttention(num_heads=num_heads,
key_dim=embedding_dim,
dropout=dropout_rate)
self.ffn = FullyConnected(embedding_dim=embedding_dim,
fully_connected_dim=fully_connected_dim)
self.layernorm1 = LayerNormalization(epsilon=layernorm_eps)
self.layernorm2 = LayerNormalization(epsilon=layernorm_eps)
self.dropout_ffn = Dropout(dropout_rate)
def call(self, x, training, mask=None):
"""
Forward pass for the Encoder Layer
Arguments:
x -- Tensor of shape (batch_size, input_seq_len, embedding_dim)
training -- Boolean, set to true to activate
the training mode for dropout layers
mask -- Boolean mask to ensure that the padding is not
treated as part of the input
Returns:
encoder_layer_out -- Tensor of shape (batch_size, input_seq_len, embedding_dim)
"""
# START CODE HERE
# calculate self-attention using mha(~1 line).
# Dropout is added by Keras automatically if the dropout parameter is non-zero during training
self_mha_output = self.mha(x, x, x, mask) # Self attention (batch_size, input_seq_len, embedding_dim)
# skip connection
# apply layer normalization on sum of the input and the attention output to get the
# output of the multi-head attention layer (~1 line)
skip_x_attention = self.layernorm1(x+self_mha_output) # (batch_size, input_seq_len, embedding_dim)
# pass the output of the multi-head attention layer through a ffn (~1 line)
ffn_output = self.ffn(skip_x_attention) # (batch_size, input_seq_len, embedding_dim)
# apply dropout layer to ffn output during training (~1 line)
# use `training=training`
ffn_output = self.dropout_ffn(ffn_output,training=training)
# apply layer normalization on sum of the output from multi-head attention (skip connection) and ffn output to get the
# output of the encoder layer (~1 line)
encoder_layer_out = self.layernorm2(ffn_output+skip_x_attention) # (batch_size, input_seq_len, embedding_dim)
# END CODE HERE
return encoder_layer_out
# UNQ_C5 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION
class Encoder(tf.keras.layers.Layer):
"""
The entire Encoder starts by passing the input to an embedding layer
and using positional encoding to then pass the output through a stack of
encoder Layers
"""
def __init__(self, num_layers, embedding_dim, num_heads, fully_connected_dim, input_vocab_size,
maximum_position_encoding, dropout_rate=0.1, layernorm_eps=1e-6):
super(Encoder, self).__init__()
self.embedding_dim = embedding_dim
self.num_layers = num_layers
self.embedding = Embedding(input_vocab_size, self.embedding_dim)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.embedding_dim)
self.enc_layers = [EncoderLayer(embedding_dim=self.embedding_dim,
num_heads=num_heads,
fully_connected_dim=fully_connected_dim,
dropout_rate=dropout_rate,
layernorm_eps=layernorm_eps)
for _ in range(self.num_layers)]
self.dropout = Dropout(dropout_rate)
def call(self, x, training, mask):
"""
Forward pass for the Encoder
Arguments:
x -- Tensor of shape (batch_size, input_seq_len)
training -- Boolean, set to true to activate
the training mode for dropout layers
mask -- Boolean mask to ensure that the padding is not
treated as part of the input
Returns:
x -- Tensor of shape (batch_size, input_seq_len, embedding_dim)
"""
seq_len = tf.shape(x)[1]
# START CODE HERE
# Pass input through the Embedding layer
x = self.embedding(x) # (batch_size, input_seq_len, embedding_dim)
# Scale embedding by multiplying it by the square root of the embedding dimension
x *= tf.sqrt(tf.cast(x.shape[-1],dtype=tf.float32))
# Add the position encoding to embedding
x += self.pos_encoding[:, :seq_len, :]
# Pass the encoded embedding through a dropout layer
# use `training=training`
x = self.dropout(x,training=training)
# Pass the output through the stack of encoding layers
for i in range(self.num_layers):
x = self.enc_layers[i](x)
# END CODE HERE
return x # (batch_size, input_seq_len, embedding_dim)
And I get:
AssertionError Traceback (most recent call last)
in
2 # gilad
3 if (not VS):
----> 4 Encoder_test(Encoder)
~/work/W4A1/public_tests.py in Encoder_test(target)
133 [[ 0.01838917, 1.038109 , -1.6154225 , 0.55892444],
134 [ 0.3872563 , -0.40960154, -1.3456631 , 1.3680083 ],
→ 135 [ 0.534565 , -0.70262754, -1.18215 , 1.3502126 ]]]), “Wrong values case 2”
136
137 encoderq_output = encoderq(x, False, np.array([[[[1., 1., 1.]]], [[[1., 1., 0.]]]]))
AssertionError: Wrong values case 2