Not following here. So enc_output
generated by self.encoder
is not used in downstream functions at all?!
Also when I use output_sentence
, it actually gives rather long error
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-26-a562b46d78e0> in <module>
1 # UNIT TEST
----> 2 Transformer_test(Transformer, create_look_ahead_mask, create_padding_mask)
~/work/W4A1/public_tests.py in Transformer_test(target, create_look_ahead_mask, create_padding_mask)
276 enc_padding_mask,
277 look_ahead_mask,
--> 278 dec_padding_mask
279 )
280
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
<ipython-input-25-b6f931f18c1e> in call(self, input_sentence, output_sentence, training, enc_padding_mask, look_ahead_mask, dec_padding_mask)
56 # call self.decoder with the appropriate arguments to get the decoder output
57 # dec_output.shape == (batch_size, tar_seq_len, fully_connected_dim)
---> 58 dec_output, attention_weights = self.decoder(input_sentence, output_sentence, training, look_ahead_mask, dec_padding_mask)
59
60 # pass decoder output through a linear layer and softmax (~2 lines)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
<ipython-input-21-f53458caddd8> in call(self, x, enc_output, training, look_ahead_mask, padding_mask)
65 # of block 1 and 2 (~1 line)
66 x, block1, block2 = self.dec_layers[i](x, enc_output, training,
---> 67 look_ahead_mask, padding_mask)
68
69 #update attention_weights dictionary with the attention weights of block 1 and block 2
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
<ipython-input-19-aacd2de2cc50> in call(self, x, enc_output, training, look_ahead_mask, padding_mask)
60 # Dropout will be applied during training
61 # Return attention scores as attn_weights_block2 (~1 line)
---> 62 mult_attn_out2, attn_weights_block2 = self.mha2(query=Q1, value=enc_output, key=enc_output, attention_mask = padding_mask, training = training, return_attention_scores=True) # (batch_size, target_seq_len, d_model)
63
64 # apply layer normalization (layernorm2) to the sum of the attention output and the output of the first block (~1 line)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/multi_head_attention.py in call(self, query, value, key, attention_mask, return_attention_scores, training)
466
467 # `key` = [B, S, N, H]
--> 468 key = self._key_dense(key)
469
470 # `value` = [B, S, N, H]
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1010 with autocast_variable.enable_auto_cast_variables(
1011 self._compute_dtype_object):
-> 1012 outputs = call_fn(inputs, *args, **kwargs)
1013
1014 if self._activity_regularizer:
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/einsum_dense.py in call(self, inputs)
199
200 def call(self, inputs):
--> 201 ret = special_math_ops.einsum(self.equation, inputs, self.kernel)
202 if self.bias is not None:
203 ret += self.bias
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/special_math_ops.py in einsum(equation, *inputs, **kwargs)
749 - number of inputs or their shapes are inconsistent with `equation`.
750 """
--> 751 return _einsum_v2(equation, *inputs, **kwargs)
752
753
/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/special_math_ops.py in _einsum_v2(equation, *inputs, **kwargs)
1178 if ellipsis_label:
1179 resolved_equation = resolved_equation.replace(ellipsis_label, '...')
-> 1180 return gen_linalg_ops.einsum(inputs, resolved_equation)
1181
1182 # Send fully specified shapes to opt_einsum, since it cannot handle unknown
/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/gen_linalg_ops.py in einsum(inputs, equation, name)
1074 return _result
1075 except _core._NotOkStatusException as e:
-> 1076 _ops.raise_from_not_ok_status(e, name)
1077 except _core._FallbackException:
1078 pass
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
6860 message = e.message + (" name: " + name if name is not None else "")
6861 # pylint: disable=protected-access
-> 6862 six.raise_from(core._status_to_exception(e.code, message), None)
6863 # pylint: enable=protected-access
6864
/opt/conda/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
InvalidArgumentError: cannot compute Einsum as input #1(zero-based) was expected to be a int64 tensor but is a float tensor [Op:Einsum]