I got the wrong values of results from the function lstm_backward. But the shapes of the results and all other corresponding functions are passed.
Below are my code and results. I’ve been stuck here for 2 days, please help me.
def lstm_backward(da, caches):
(caches, x) = caches
(a1, c1, a0, c0, f1, i1, cc1, o1, x1, parameters) = caches[0]
n_a, m, T_x = da.shape
n_x, m = x1.shape
dx = np.zeros((n_x, m, T_x))
da0 = np.zeros((n_a, m))
da_prevt = np.zeros((n_a, m))
dc_prevt = np.zeros((n_a, m))
dWf = np.zeros((n_a, n_a + n_x))
dWi = np.zeros((n_a, n_a + n_x))
dWc = np.zeros((n_a, n_a + n_x))
dWo = np.zeros((n_a, n_a + n_x))
dbf = np.zeros((n_a, 1))
dbi = np.zeros((n_a, 1))
dbc = np.zeros((n_a, 1))
dbo = np.zeros((n_a, 1))
for t in reversed(range(T_x)):
gradients = lstm_cell_backward(da[:,:,t] + da_prevt, dc_prevt, caches[t])
da_prevt = gradients['da_prev']
dc_prevt = gradients['dc_prev']
dx[:,:,t] = gradients['dxt']
dWf += gradients['dWf']
dWi += gradients['dWi']
dWc += gradients['dWc']
dWo += gradients['dWo']
dbf += gradients['dbf']
dbi += gradients['dbi']
dbc += gradients['dbc']
dbo += gradients['dbo']
da0 = da_prevt
gradients = {"dx": dx, "da0": da0, "dWf": dWf,"dbf": dbf, "dWi": dWi,"dbi": dbi,
"dWc": dWc,"dbc": dbc, "dWo": dWo,"dbo": dbo}
return gradients
resuts:
gradients["dx"][1][2] = [ 0.01034214 1.03473735 -0.2398793 -0.43281115]
gradients["dx"].shape = (3, 10, 4)
gradients["da0"][2][3] = 0.5883931290038376
gradients["da0"].shape = (5, 10)
gradients["dWf"][3][1] = -0.02269017674887574
gradients["dWf"].shape = (5, 8)
gradients["dWi"][1][2] = 0.6099853844261891
gradients["dWi"].shape = (5, 8)
gradients["dWc"][3][1] = -0.013857139274558946
gradients["dWc"].shape = (5, 8)
gradients["dWo"][1][2] = 0.04772920545685257
gradients["dWo"].shape = (5, 8)
gradients["dbf"][4] = [-0.199665]
gradients["dbf"].shape = (5, 1)
gradients["dbi"][4] = [-0.7340795]
gradients["dbi"].shape = (5, 1)
gradients["dbc"][4] = [-0.56981661]
gradients["dbc"].shape = (5, 1)
gradients["dbo"][4] = [-0.24499124]
gradients["dbo"].shape = (5, 1)