Hello guys,
During the training my loss goes to minus values. This is my training step function but it shouldnt be the cause:
@tf.function()
def train_step(generated_image):
with tf.GradientTape() as tape:
# In this function you must use the precomputed encoded images a_S and a_C
# Compute a_G as the vgg_model_outputs for the current generated image
### START CODE HERE
#(1 line)
a_G = vgg_model_outputs(generated_image)
# Compute the style cost
#(1 line)
J_style = compute_style_cost(a_S, a_G)
#(2 lines)
# Compute the content cost
J_content = compute_content_cost(a_C, a_G)
# Compute the total cost
J = total_cost(J_content, J_style)
### END CODE HERE
grad = tape.gradient(J, generated_image)
optimizer.apply_gradients([(grad, generated_image)])
generated_image.assign(clip_0_1(generated_image))
# For grading purposes
return J
Test results:
tf.Tensor(-128130720.0, shape=(), dtype=float32)
AssertionError: Unexpected cost for epoch 0: -128130720.0 != 10221.168
I think it should be connected with the style loss as I already had some problems with it before and when I change style loss value to some fixed number minus loss values disappear.
What is interesting is that during the style loss test I get the expected result, that is 14.017805.
This is my code for the style layer cost:
def compute_layer_style_cost(a_S, a_G):
“”"
Arguments:
a_S – tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S
a_G – tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G
Returns:
J_style_layer -- tensor representing a scalar value, style cost defined above by equation (2)
"""
# Retrieve dimensions from a_G (≈1 line)
m, n_H, n_W, n_C = a_G.get_shape().as_list()
a_S = tf.reshape(tf.transpose(a_S, perm=[3,1,2,0]), shape=[n_C, n_H * n_W])
a_G = tf.reshape(tf.transpose(a_G, perm=[3,1,2,0]), shape=[n_C, n_H * n_W])
# Computing gram_matrices for both images S and G (≈2 lines)
GS = gram_matrix(a_S)
GG = gram_matrix(a_G)
# Computing the loss (≈1 line)
J_style_layer_1 = 1/(4 * tf.square(n_C) * tf.square(n_H * n_W))
J_style_layer_2 = tf.reduce_sum(tf.reduce_sum(tf.square(tf.subtract(GS, GG))))
J_style_layer_2 = tf.cast(J_style_layer_2, tf.float32)
J_style_layer_1 = tf.cast(J_style_layer_1, tf.float32)
J_style_layer = tf.math.multiply(J_style_layer_1, J_style_layer_2)
### END CODE HERE
return J_style_layer
Without the cast rows:
J_style_layer_2 = tf.cast(J_style_layer_2, tf.float32)
J_style_layer_1 = tf.cast(J_style_layer_1, tf.float32)
it gives me error on test saying:
InvalidArgumentError: cannot compute Mul as input #1(zero-based) was expected to be a double tensor but is a float tensor [Op:Mul]
It would be nice if someone could help me in this.
Best regards,
Roberts