Hi @Deepti_Prasad , @paulinpaloalto ,
Its not same problem, I can give another stacktrace of the ungraded part of the same exercise
```
Training set example:
[SOS] amanda: i baked cookies. do you want some? jerry: sure! amanda: i’ll bring you tomorrow
[EOS]
Human written summary:
[SOS] amanda baked cookies and will bring jerry some tomorrow. [EOS]
Model written summary:
2026-01-01 05:50:44.849106: W tensorflow/core/framework/op_kernel.cc:1816] INVALID_ARGUMENT: required broadcastable shapes
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
Cell In[33], line 9
7 print(summary[training_set_example])
8 print('\nModel written summary:')
----> 9 summarize(transformer, document[training_set_example])
Cell In[32], line 16, in summarize(model, input_document)
13 output = tf.expand_dims([tokenizer.word_index["[SOS]"]], 0)
15 for i in range(decoder_maxlen):
---> 16 predicted_id = next_word(model, encoder_input, output)
17 output = tf.concat([output, predicted_id], axis=-1)
19 if predicted_id == tokenizer.word_index["[EOS]"]:
Cell In[29], line 20, in next_word(model, encoder_input, output)
17 dec_padding_mask = create_padding_mask(output)
19 # Run the prediction of the next word with the transformer model
---> 20 predictions, attention_weights = model(
21 encoder_input,
22 output,
23 True,
24 enc_padding_mask,
25 look_ahead_mask,
26 dec_padding_mask
27 )
28 ### END CODE HERE ###
30 predictions = predictions[: ,-1:, :]
File /usr/local/lib/python3.8/dist-packages/keras/src/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
Cell In[21], line 57, in Transformer.call(self, input_sentence, output_sentence, training, enc_padding_mask, look_ahead_mask, dec_padding_mask)
53 enc_output = self.encoder(input_sentence, training, enc_padding_mask)
55 # call self.decoder with the appropriate arguments to get the decoder output
56 # dec_output.shape == (batch_size, tar_seq_len, fully_connected_dim)
---> 57 dec_output, attention_weights = self.decoder(output_sentence, enc_output, training, look_ahead_mask, dec_padding_mask)
59 # pass decoder output through a linear layer and softmax (~1 line)
60 final_output = self.final_layer(dec_output)
Cell In[18], line 66, in Decoder.call(self, x, enc_output, training, look_ahead_mask, padding_mask)
62 # use a for loop to pass x through a stack of decoder layers and update attention_weights (~4 lines total)
63 for i in range(self.num_layers):
64 # pass x and the encoder output through a stack of decoder layers and save the attention weights
65 # of block 1 and 2 (~1 line)
---> 66 x, block1, block2 = self.dec_layers[i](x, enc_output, training, look_ahead_mask, padding_mask)
67 #update attention_weights dictionary with the attention weights of block 1 and block 2
68 attention_weights['decoder_layer{}_block1_self_att'.format(i+1)] = block1
Cell In[15], line 67, in DecoderLayer.call(self, x, enc_output, training, look_ahead_mask, padding_mask)
61 Q1 = self.layernorm1(x + mult_attn_out1)
63 # BLOCK 2
64 # calculate self-attention using the Q from the first block and K and V from the encoder output.
65 # Dropout will be applied during training
66 # Return attention scores as attn_weights_block2 (~1 line)
---> 67 mult_attn_out2, attn_weights_block2 = self.mha2(Q1, enc_output, enc_output, padding_mask, return_attention_scores = True)
69 # # apply layer normalization (layernorm2) to the sum of the attention output and the Q from the first block (~1 line)
70 mult_attn_out2 = self.layernorm2(Q1 + mult_attn_out2)
InvalidArgumentError: Exception encountered when calling layer 'softmax_58' (type Softmax).
{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:AddV2] name:
Call arguments received by layer 'softmax_58' (type Softmax):
• inputs=tf.Tensor(shape=(1, 2, 2, 150), dtype=float32)
• mask=tf.Tensor(shape=(1, 1, 1, 2), dtype=float32)
```