Hi,
Please help me understand if there is any problem in my code, as my results do not match with the ‘Expected output’. Following is the code for lstm_backward:
def lstm_backward(da, caches):
(caches, x) = caches
(a1, c1, a0, c0, f1, i1, cc1, o1, x1, parameters) = caches[0]
### START CODE HERE ###
n_a, m, T_x = da.shape
n_x, m = x1.shape
dx = np.zeros((n_x, m, T_x), dtype=float)
da0 = np.zeros((n_a, m), dtype=float)
da_prevt = np.zeros((n_a, m), dtype=float)
dc_prevt = np.zeros((n_a, m), dtype=float)
dWf = np.zeros((n_a, n_a + n_x), dtype=float)
dWi = np.zeros((n_a, n_a + n_x), dtype=float)
dWc = np.zeros((n_a, n_a + n_x), dtype=float)
dWo = np.zeros((n_a, n_a + n_x), dtype=float)
dbf = np.zeros((n_a, 1), dtype=float)
dbi = np.zeros((n_a, 1), dtype=float)
dbc = np.zeros((n_a, 1), dtype=float)
dbo = np.zeros((n_a, 1), dtype=float)
for t in reversed(range(T_x)):
gradients = lstm_cell_backward(da[:,:,t] + da_prevt, dc_prevt, caches[t])
da_prevt = gradients["da_prev"]
dc_prevt = gradients["dc_prev"]
dx[:,:,t] = gradients["dxt"]
dWf += gradients["dWf"]
dWi += gradients["dWi"]
dWc += gradients["dWc"]
dWo += gradients["dWo"]
dbf += gradients["dbf"]
dbi += gradients["dbi"]
dbc += gradients["dbc"]
dbo += gradients["dbo"]
da0 = da_prevt
### END CODE HERE ###
gradients = {"dx": dx, "da0": da0, "dWf": dWf,"dbf": dbf, "dWi": dWi,"dbi": dbi,
"dWc": dWc,"dbc": dbc, "dWo": dWo,"dbo": dbo}
return gradients