I am having trouble getting Decoder, Exercise 7.
My traceback looks like this:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-423-dd4b65c051b4> in <module>
1 # UNIT TEST
----> 2 Decoder_test(Decoder, create_look_ahead_mask, create_padding_mask)
~/work/W4A1/public_tests.py in Decoder_test(target, create_look_ahead_mask, create_padding_mask)
218 target_vocab_size,
219 maximum_position_encoding)
--> 220 x, attention_weights = decoderk(x_array, encoderq_output, False, look_ahead_mask, None)
221 assert tf.is_tensor(x), "Wrong type for x. It must be a dict"
222 assert np.allclose(tf.shape(x), tf.shape(encoderq_output)), f"Wrong shape. We expected { tf.shape(encoderq_output)}"
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
<ipython-input-422-52b03a9899fd> in call(self, x, enc_output, training, look_ahead_mask, padding_mask)
68 print("look_ahead_mask is", look_ahead_mask)
69 print("padding_mask is", padding_mask)
---> 70 x, block1, block2 = self.dec_layers[i](x, enc_output, enc_output, look_ahead_mask, padding_mask)
71
72 #update attention_weights dictionary with the attention weights of block 1 and block 2
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
<ipython-input-420-1648c8091ba9> in call(self, x, enc_output, training, look_ahead_mask, padding_mask)
51 # calculate self-attention and return attention scores as attn_weights_block1.
52 # Dropout will be applied during training (~1 line).
---> 53 mult_attn_out1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask, return_attention_scores=True) # (batch_size, target_seq_len, embedding_dim)
54
55 # apply layer normalization (layernorm1) to the sum of the attention output and the input (~1 line)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/multi_head_attention.py in call(self, query, value, key, attention_mask, return_attention_scores, training)
472
473 attention_output, attention_scores = self._compute_attention(
--> 474 query, key, value, attention_mask, training)
475 attention_output = self._output_dense(attention_output)
476
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/multi_head_attention.py in _compute_attention(self, query, key, value, attention_mask, training)
441 # seem a bit unusual, but is taken from the original Transformer paper.
442 attention_scores_dropout = self._dropout_layer(
--> 443 attention_scores, training=training)
444
445 # `context_layer` = [B, T, N, H]
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/core.py in call(self, inputs, training)
229
230 output = control_flow_util.smart_cond(training, dropped_inputs,
--> 231 lambda: array_ops.identity(inputs))
232 return output
233
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/control_flow_util.py in smart_cond(pred, true_fn, false_fn, name)
113 pred, true_fn=true_fn, false_fn=false_fn, name=name)
114 return smart_module.smart_cond(
--> 115 pred, true_fn=true_fn, false_fn=false_fn, name=name)
116
117
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/smart_cond.py in smart_cond(pred, true_fn, false_fn, name)
51 pred_value = smart_constant_value(pred)
52 if pred_value is not None:
---> 53 if pred_value:
54 return true_fn()
55 else:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I noticed that when I print padding_mask at the very beginning of the function, it is None. I want to make sure it’s supposed to be that way, and if it is, I am unsure how to proceed.