Hello
I try to replicate previous assignment Course 2 - Week 01 - Assignment 01
using tensorflow. However, the result not only similar to the result in Assignment 01, but also does not converge. I hope to get some help from fellows for:
- Why the result does not converge, giving the same set up in the assignment 1 (W1)
- Why the result is not similar?
Here is the complete code:
Cell 1/7
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import tensorflow as tf
from public_tests import *
from init_utils import load_dataset, plot_decision_boundary
%matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
%load_ext autoreload
%autoreload 2
Cell 2/7
train_X, train_Y, test_X, test_Y = load_dataset()
Cell 3/7
def forward_propagation(X, parameters):`
# retrieve parameters
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
W3 = parameters["W3"]
b3 = parameters["b3"]
# LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
Z1 = W1@X + b1
A1 = tf.keras.activations.relu(Z1)
Z2 = W2@A1 + b2
A2 = tf.keras.activations.relu(Z2)
Z3 = W3@A2 + b3
A3 = tf.keras.activations.sigmoid(Z3)
return A3
Cell 4/7
def compute_loss(A3, Y):
m = tf.cast(tf.shape(Y)[1], tf.float32) # Ensure the shape parameter is a float type for division
loss = (-1./m) * tf.reduce_sum(Y * tf.math.log(A3 + 1e-9) + (1 - Y) * tf.math.log(1 - A3 + 1e-9))
loss = tf.squeeze(loss) # Removes dimensions of size 1 from the shape of the tensor.
return loss
Cell 5/7
def initialize_parameters_random_tf(layers_dims):
np.random.seed(3) # Setting the seed for reproducibility
parameters = {}
L = len(layers_dims) # number of layers in the network
for l in range(1, L):
# Using np.random.randn for initialization
W = np.random.randn(layers_dims[l], layers_dims[l-1]).astype(np.float32) * 10
b = np.zeros((layers_dims[l], 1), dtype=np.float32)
# Converting to tf.Variable for TensorFlow compatibility
parameters[f'W{l}'] = tf.Variable(W, trainable=True)
parameters[f'b{l}'] = tf.Variable(b, trainable=True)
return parameters
Cell 6/7
def fit_check(X, Y, epochs=15000):
# Step 01: Variables setup:
optimizer = tf.keras.optimizers.SGD(0.01)
X = tf.convert_to_tensor(X, dtype=tf.float32)
Y = tf.convert_to_tensor(Y, dtype=tf.float32)
parameters = initialize_parameters_random_tf([X.shape[0], 10, 5, 1])
# Step 02: Iteration
for epoch in range(epochs):
with tf.GradientTape() as tape:
# Forward propagation:
A3 = forward_propagation(X, parameters)
# Loss calculation
loss = compute_loss(A3, Y)
# Calculate gradients with respect to the parameters
grads = tape.gradient(loss, list(parameters.values()))
# Update the parameters
optimizer.apply_gradients(zip(grads, list(parameters.values())))
if epoch % 1000 == 0:
print(f"Epoch {epoch}, Loss: {loss.numpy()}")
Cell 7/7
fit_check(train_X, train_Y)
Here is the result I get
Epoch 0, Loss: 10.361632347106934
Epoch 1000, Loss: 10.361632347106934
Epoch 2000, Loss: 10.361632347106934
Epoch 3000, Loss: 10.361632347106934
Epoch 4000, Loss: 10.361632347106934
Epoch 5000, Loss: 10.361632347106934
Epoch 6000, Loss: 10.361632347106934
Epoch 7000, Loss: 10.361632347106934
Epoch 8000, Loss: 10.361632347106934
Epoch 9000, Loss: 10.361632347106934
Epoch 10000, Loss: 10.361632347106934
Epoch 11000, Loss: 10.361632347106934
Epoch 12000, Loss: 10.361632347106934
Epoch 13000, Loss: 10.361632347106934
Epoch 14000, Loss: 10.361632347106934



