Adjust hyperparameters of lstm model for time series prediction

I have a daily time series of daily precipitation, min temperature, max temperature and streamflow data (at 3 different stations) of 30 years (from 1981 to 2010). I want to predict the streamflow of the downstream station (Niamey).

I am not getting good results for my model.

image

Here is the code of my simulation.

"""6_lstm_model.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1-oxxcH2ybR1dhsx5bi4YrnOdJ5reVVMT

## Imports
"""

# Commented out IPython magic to ensure Python compatibility.
#import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import tensorflow as tf
# %matplotlib inline

#print versions of modules
print("Numpy: " + np.__version__)
print("Pandas: " + pd.__version__)
print("Tensorflow: " + tf.__version__)
print("Science Kit Learn: " + sklearn.__version__)

#import additional modules
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau

"""## Data"""

#load data on local system
#df = pd.read_csv('./model_data/model_data_interpolated.csv')
#df.head(2)

#load data from google drive
from google.colab import drive
drive.mount("/content/gdrive")

#load data
df = pd.read_csv("/content/gdrive/My Drive/colab_notebooks/memoire_master/model_data_interpolated.csv")
df.head(2)

#drop time column
date_time = pd.to_datetime(df.pop('Date'), format='%Y-%m-%d') #or infer_datetime_format=True

#view dataset
df.set_index(date_time)[['pr', 'tmax', 'tmin', 'Niamey', 'Ansongo', 'Kandadji']].plot(subplots=True)

#inspection
df.describe().transpose()

"""## Data split"""

#set target data
shift_steps = 1
target_names = 'Niamey'
df_targets = df[target_names].shift(-shift_steps)

#set input data
x_data = df.values[0:-shift_steps]
num_x_signals = x_data.shape[1]
num_x_signals

#set output data
y_data = df_targets.values[:-shift_steps]
num_y_signals = y_data.reshape(-1,1).shape[1]
num_y_signals

#set ratio
train_split = 0.8

#split training from testing data
num_data = len(x_data)
num_train = int(train_split * num_data)
num_test = num_data - num_train

x_train = x_data[0:num_train]
x_test = x_data[num_train:]

y_train = y_data[0:num_train]
y_test = y_data[num_train:]

"""## Normalization """

#normalize input and output data
x_scaler = MinMaxScaler()
x_train_scaled = x_scaler.fit_transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1,1))
y_test_scaled = y_scaler.transform(y_test.reshape(-1,1))

"""## Data Generator"""

batch_size = 32
sequence_length = 365

#set batch generators
train_generator = TimeseriesGenerator(x_train_scaled, y_train_scaled, 
                                      length=sequence_length, sampling_rate=1, batch_size=batch_size)
test_generator = TimeseriesGenerator(x_test_scaled, y_test_scaled,
                                     length=sequence_length, sampling_rate=1, batch_size=batch_size)

"""## LSTM Setting Up"""

#set LSTM neural network
model = Sequential()

model.add(LSTM(units=25, return_sequences=True, input_shape=(None, num_x_signals)))
model.add(Dense(1))

#set compiler
model.compile(loss=tf.losses.MeanSquaredError(), optimizer=tf.optimizers.Adam())

model.summary()

"""## Callback Functions"""

#write checkpoints during training
path_checkpoint = '1_checkpoint.keras'
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, 
                                      monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True)

#stop optimization when performance worsens on the validation set
callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

#write TensorBoard log during training
callback_tensorboard = TensorBoard(log_dir='./1_logs/', histogram_freq=0, write_graph=False)

#reduce learning rate for optimizer if validation loss has not improved since last epoch
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1)

callbacks = [callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr]

"""## LSTM Training"""

validation_data = (np.expand_dims(x_test_scaled, axis=0),
                   np.expand_dims(y_test_scaled, axis=0))

# Commented out IPython magic to ensure Python compatibility.
# %%time
# history = model.fit(x=train_generator, epochs=50, validation_data=validation_data, callbacks=callbacks)

#Load Checkpoint
#try:
#    model.load_weights(path_checkpoint)
#except Exception as error:
#    print("Error trying to load checkpoint.")
#    print(error)

"""## Testing"""

#performance on test set
result = model.evaluate(x=np.expand_dims(x_test_scaled, axis=0),
                        y=np.expand_dims(y_test_scaled, axis=0))

print("loss (test-set):", result)

#plot
plt.figure(figsize=(8,6))
plt.plot(history.history['loss'], 'o-', mfc='none', markersize=10, label='Train')
plt.plot(history.history['val_loss'], 'o-', mfc='none', markersize=10, label='Valid')
plt.title('LSTM Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()