C2_W4_Assignment test_back_prop fails by b1 value

Hi @sugaprho

This issue is pretty common, so here are the values that you can check against (in your case, the last part):

Check the inputs


x.shape:

(5778, 4)

x.values:

array([[0.  , 0.  , 0.25, 0.25],
       [0.  , 0.25, 0.25, 0.  ],
       [0.  , 0.  , 0.  , 0.  ],
       ...,
       [0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  ]])

yhat.shape:

(5778, 4)

yhat.values:

array([[5.65860316e-06, 6.30488530e-06, 1.05841040e-05, 5.18449964e-06],
       [1.29186998e-04, 1.37714283e-04, 1.02187460e-04, 8.04556333e-05],
       [2.09992073e-06, 2.04610255e-06, 1.93481769e-06, 2.22081401e-06],
       ...,
       [9.18385839e-05, 1.17760693e-04, 1.47190763e-04, 1.63875601e-04],
       [3.92654489e-06, 3.05574159e-06, 4.32752127e-06, 4.67408286e-06],
       [7.41423404e-05, 1.31285979e-04, 1.57982820e-04, 1.25570871e-04]])

y.shape:

(5778, 4)

y.values:

array([[0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

W1.shape:

(50, 5778)

W1.values:

array([[4.17022005e-01, 7.20324493e-01, 1.14374817e-04, ...,
        3.18453337e-02, 8.69477233e-03, 3.58537645e-01],
       [3.06338212e-02, 5.17847453e-01, 3.55390080e-03, ...,
        3.18255407e-01, 8.19716276e-01, 5.92131246e-01],
       [3.72634224e-02, 7.78301760e-01, 2.58389111e-01, ...,
        6.59538833e-01, 6.19214356e-01, 5.70114010e-01],
       ...,
       [3.00697829e-01, 5.34150368e-01, 9.82730945e-01, ...,
        3.50735793e-02, 8.00820428e-01, 7.97962643e-01],
       [1.04441188e-02, 3.75743267e-01, 8.00548200e-01, ...,
        9.53145499e-01, 3.97463811e-01, 2.59193158e-01],
       [9.40443797e-01, 6.20337024e-01, 3.49636080e-01, ...,
        2.12197620e-01, 7.34165670e-01, 4.04981356e-01]])

W2.shape

(5778, 50)

W2.values

array([[0.98756632, 0.26921735, 0.79967476, ..., 0.7092524 , 0.26348048,
        0.91307305],
       [0.32481516, 0.49150709, 0.93396834, ..., 0.87124107, 0.66809605,
        0.59724161],
       [0.33875982, 0.97059413, 0.71778828, ..., 0.50609534, 0.74589904,
        0.17921762],
       ...,
       [0.26514628, 0.12376321, 0.04835613, ..., 0.51204125, 0.82800244,
        0.83260737],
       [0.43858813, 0.22220155, 0.23391658, ..., 0.44519868, 0.31930962,
        0.41111395],
       [0.3361489 , 0.37257524, 0.84969136, ..., 0.22725725, 0.14900807,
        0.37457519]])

b1.shape

(50, 1)

b1.values

array([[0.20354182],
       [0.7393041 ],
       [0.52322409],
       ...,
       [0.95183274],
       [0.12313232],
       [0.71743542]])

b2.shape

(5778, 1)

b2.values

array([[0.26853656],
       [0.42872682],
       [0.34107181],
       ...,
       [0.97258741],
       [0.77647386],
       [0.7201184 ]])

batch_size

4


Check your calculations

z1.shape:

(50, 4)

z1.values:

array([[0.58475793, 0.77387674, 0.68392272, 0.62577631],
       [1.42914862, 1.42776461, 1.10799043, 1.23087398],
       [1.09752993, 0.98610201, 0.99513927, 0.76984205],
       ...,
       [1.40200846, 1.40704489, 1.47177351, 1.2876826 ],
       [0.80057356, 0.60361842, 0.37715675, 0.63943922],
       [1.08343076, 1.23078049, 1.33236934, 1.21405054]])

Implemented for you

l1.shape:

# Compute l1 as W2^T (Yhat - Y) result:
(50, 4)

l1.values:

array([[ 1.90619151e-01,  5.04911863e-01,  2.39850923e-01,
         4.70598914e-01],
       [ 1.11155469e-01,  2.16875768e-02,  5.15676504e-02,
        -3.41496985e-01],
       [-3.26332279e-01,  5.81739107e-02,  1.32373299e-02,
        -1.63465397e-01],
       ...,
       [-2.56035248e-01, -2.60606873e-01, -1.61361246e-01,
        -3.53597305e-01],
       [-1.22656968e-01, -1.29136393e-01,  3.99540453e-01,
         5.15137770e-01],
       [-1.17463600e-02,  1.55212349e-01, -8.04714320e-02,
         4.84456794e-01]])

l1.shape:

# use "l1" to compute gradients below (implemented for you)
# in this (unfortunate) case all z1 are > 0, so result does not change

(50, 4)

l1.values:

array([[ 1.90619151e-01,  5.04911863e-01,  2.39850923e-01,
         4.70598914e-01],
       [ 1.11155469e-01,  2.16875768e-02,  5.15676504e-02,
        -3.41496985e-01],
       [-3.26332279e-01,  5.81739107e-02,  1.32373299e-02,
        -1.63465397e-01],
       ...,
       [-2.56035248e-01, -2.60606873e-01, -1.61361246e-01,
        -3.53597305e-01],
       [-1.22656968e-01, -1.29136393e-01,  3.99540453e-01,
         5.15137770e-01],
       [-1.17463600e-02,  1.55212349e-01, -8.04714320e-02,
         4.84456794e-01]])

Implemented for you

Now your calculations:

grad_W1.shape:

# compute the gradient for W1
(50, 5778)

grad_W1.values:

array([[ 0.04440311,  0.04654767,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.01812058,  0.00457845,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.00938925,  0.0044632 ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-0.03218491, -0.02637301,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.05716739,  0.01690025,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.02524909,  0.00467131,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

grad_W2.shape:

# Compute gradient of W2
(5778, 50)

grad_W2.values:

array([[ 4.66779085e-06,  8.79935716e-06,  6.73791242e-06, ...,
         9.76451498e-06,  3.91072793e-06,  8.57170370e-06],
       [-1.46113894e-01, -3.57138779e-01, -2.74272178e-01, ...,
        -3.50344892e-01, -2.00074256e-01, -2.70721867e-01],
       [ 1.38109376e-06,  2.69993832e-06,  1.98937019e-06, ...,
         2.88259543e-06,  1.26650284e-06,  2.51687351e-06],
       ...,
       [ 8.70130445e-05,  1.66045427e-04,  1.22388322e-04, ...,
         1.80526070e-04,  7.62271359e-05,  1.59876008e-04],
       [ 2.63636651e-06,  5.13063877e-06,  3.80689133e-06, ...,
         5.54812026e-06,  2.40223393e-06,  4.86387903e-06],
       [ 8.28954510e-05,  1.55752817e-04,  1.16179863e-04, ...,
         1.70720952e-04,  6.96205653e-05,  1.51213291e-04]])

grad_b1.shape:

# compute gradient for b1
(50, 1)

grad_b1.values:

array([[ 0.35149521],
       [-0.03927157],
       [-0.10459661],
       ...,
       [-0.25790017],
       [ 0.16572122],
       [ 0.13686284]])

grad_b2.shape:

# compute gradient for b2
(5778, 1)

grad_b2.values:

array([[ 6.93302302e-06],
       [-2.49887614e-01],
       [ 2.07541375e-06],
       ...,
       [ 1.30166410e-04],
       [ 3.99597265e-06],
       [ 1.22245503e-04]])

Cheers

1 Like