In exercise 5 in the next_word function I keep getting this error despite all the tests passing
Here’s the error message
@Deepti_Prasad
Training set example:
[SOS] amanda: i baked cookies. do you want some? jerry: sure! amanda: i’ll bring you tomorrow [EOS]
Human written summary:
[SOS] amanda baked cookies and will bring jerry some tomorrow. [EOS]
Model written summary:
2024-08-31 20:27:40.199226: W tensorflow/core/framework/op_kernel.cc:1816] INVALID_ARGUMENT: required broadcastable shapes
InvalidArgumentError Traceback (most recent call last)
Cell In[204], line 9
7 print(summary[training_set_example])
8 print(‘\nModel written summary:’)
----> 9 summarize(transformer, document[training_set_example])
Cell In[203], line 16, in summarize(model, input_document)
13 output = tf.expand_dims([tokenizer.word_index[“[SOS]”]], 0)
15 for i in range(decoder_maxlen):
—> 16 predicted_id = next_word(model, encoder_input, output)
17 output = tf.concat([output, predicted_id], axis=-1)
19 if predicted_id == tokenizer.word_index[“[EOS]”]:
Cell In[198], line 20, in next_word(model, encoder_input, output)
17 dec_padding_mask = create_padding_mask(output)
19 # Run the prediction of the next word with the transformer model
—> 20 predictions, attention_weights = model(input_sentence=encoder_input,
21 output_sentence=output,
22 training=False,
23 enc_padding_mask=enc_padding_mask,
24 look_ahead_mask=look_ahead_mask,
25 dec_padding_mask=dec_padding_mask
26 )
27 ### END CODE HERE ###
29 predictions = predictions[: ,-1:, :]
File /usr/local/lib/python3.8/dist-packages/keras/src/utils/traceback_utils.py:70, in filter_traceback..error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.traceback)
68 # To get the full stack trace, call:
69 # tf.debugging.disable_traceback_filtering()
—> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
Cell In[188], line 57, in Transformer.call(self, input_sentence, output_sentence, training, enc_padding_mask, look_ahead_mask, dec_padding_mask)
53 enc_output = self.encoder(input_sentence, training, enc_padding_mask)
55 # call self.decoder with the appropriate arguments to get the decoder output
56 # dec_output.shape == (batch_size, tar_seq_len, fully_connected_dim)
—> 57 dec_output, attention_weights = self.decoder(output_sentence, enc_output, training, look_ahead_mask, dec_padding_mask)
59 # pass decoder output through a linear layer and softmax (~1 line)
60 final_output = self.final_layer(dec_output)
Cell In[183], line 66, in Decoder.call(self, x, enc_output, training, look_ahead_mask, padding_mask)
62 # use a for loop to pass x through a stack of decoder layers and update attention_weights (~4 lines total)
63 for i in range(self.num_layers):
64 # pass x and the encoder output through a stack of decoder layers and save the attention weights
65 # of block 1 and 2 (~1 line)
—> 66 x, block1, block2 = self.dec_layers[i](x, enc_output, training, look_ahead_mask, padding_mask)
68 #update attention_weights dictionary with the attention weights of block 1 and block 2
69 attention_weights[‘decoder_layer{}_block1_self_att’.format(i+1)] = block1
Cell In[176], line 67, in DecoderLayer.call(self, x, enc_output, training, look_ahead_mask, padding_mask)
61 Q1 = self.layernorm1(x + mult_attn_out1)
63 # BLOCK 2
64 # calculate self-attention using the Q from the first block and K and V from the encoder output.
65 # Dropout will be applied during training
66 # Return attention scores as attn_weights_block2 (~1 line)
—> 67 mult_attn_out2, attn_weights_block2 = self.mha2(Q1, enc_output, enc_output, attention_mask=padding_mask, return_attention_scores = True)
69 # # apply layer normalization (layernorm2) to the sum of the attention output and the Q from the first block (~1 line)
70 mult_attn_out2 = self.layernorm2(Q1 + mult_attn_out2)
InvalidArgumentError: Exception encountered when calling layer ‘softmax_87’ (type Softmax).
{{function_node _wrapped__AddV2_device/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:AddV2] name:
Call arguments received by layer ‘softmax_87’ (type Softmax):
• inputs=tf.Tensor(shape=(1, 2, 2, 150), dtype=float32)
• mask=tf.Tensor(shape=(1, 1, 1, 2), dtype=float32)