Hi,
I have been successful with a 2 layer network, but whenever I increase the network size with additional layers, my predictor always predicts ‘1’.
I don’t know whether the mistake is coming from the forward prop or backward prop.
Please see my code for a 3 layer network:
def forward_prop(X_train, W1, b1, W2, b2, W3, b3):
Z1 = np.dot(W1, X_train) + b1
A1 = ReLU(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = ReLU(Z2)
Z3 = np.dot(W3, A2) + b3
A3 = softmax2(Z3)
return Z1, A1, Z2, A2, Z3, A3
def back_prop(X_train, Y_train, Z1, A1, Z2, A2, W1, W2, W3, A3):
m = Y_train.size
one_hot_Y = one_hot(Y_train)
dZ3 = A3 - one_hot_Y
dW3 = 1/m * np.dot(dZ3,A2.T)
db3 = 1/m*np.sum(dZ3, axis=1, keepdims=True)
dZ2 = np.dot(W3.T, dZ3)* deriv_ReLU(Z2)
dW2 = 1/m * np.dot(dZ2,A1.T)
db2 = 1/m*np.sum(dZ2, axis=1, keepdims=True)
dZ1 = np.dot(W2.T, dZ2)* deriv_ReLU(Z1)
dW1 = 1/m * np.dot(dZ1,X_train.T)
db1 = 1/m*np.sum(dZ1, axis=1, keepdims=True)
return dW1, db1, dW2, db2, dW3, db3
ADDITIONALLY:
I do not know how to compute the cost function. If anyone can help that would be great