I have a daily time series of daily precipitation, min temperature, max temperature and streamflow data (at 3 different stations) of 30 years (from 1981 to 2010). I want to predict the streamflow of the downstream station (Niamey).
I am not getting good results for my model.
Here is the code of my simulation.
"""6_lstm_model.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1-oxxcH2ybR1dhsx5bi4YrnOdJ5reVVMT
## Imports
"""
# Commented out IPython magic to ensure Python compatibility.
#import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import tensorflow as tf
# %matplotlib inline
#print versions of modules
print("Numpy: " + np.__version__)
print("Pandas: " + pd.__version__)
print("Tensorflow: " + tf.__version__)
print("Science Kit Learn: " + sklearn.__version__)
#import additional modules
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
"""## Data"""
#load data on local system
#df = pd.read_csv('./model_data/model_data_interpolated.csv')
#df.head(2)
#load data from google drive
from google.colab import drive
drive.mount("/content/gdrive")
#load data
df = pd.read_csv("/content/gdrive/My Drive/colab_notebooks/memoire_master/model_data_interpolated.csv")
df.head(2)
#drop time column
date_time = pd.to_datetime(df.pop('Date'), format='%Y-%m-%d') #or infer_datetime_format=True
#view dataset
df.set_index(date_time)[['pr', 'tmax', 'tmin', 'Niamey', 'Ansongo', 'Kandadji']].plot(subplots=True)
#inspection
df.describe().transpose()
"""## Data split"""
#set target data
shift_steps = 1
target_names = 'Niamey'
df_targets = df[target_names].shift(-shift_steps)
#set input data
x_data = df.values[0:-shift_steps]
num_x_signals = x_data.shape[1]
num_x_signals
#set output data
y_data = df_targets.values[:-shift_steps]
num_y_signals = y_data.reshape(-1,1).shape[1]
num_y_signals
#set ratio
train_split = 0.8
#split training from testing data
num_data = len(x_data)
num_train = int(train_split * num_data)
num_test = num_data - num_train
x_train = x_data[0:num_train]
x_test = x_data[num_train:]
y_train = y_data[0:num_train]
y_test = y_data[num_train:]
"""## Normalization """
#normalize input and output data
x_scaler = MinMaxScaler()
x_train_scaled = x_scaler.fit_transform(x_train)
x_test_scaled = x_scaler.transform(x_test)
y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1,1))
y_test_scaled = y_scaler.transform(y_test.reshape(-1,1))
"""## Data Generator"""
batch_size = 32
sequence_length = 365
#set batch generators
train_generator = TimeseriesGenerator(x_train_scaled, y_train_scaled,
length=sequence_length, sampling_rate=1, batch_size=batch_size)
test_generator = TimeseriesGenerator(x_test_scaled, y_test_scaled,
length=sequence_length, sampling_rate=1, batch_size=batch_size)
"""## LSTM Setting Up"""
#set LSTM neural network
model = Sequential()
model.add(LSTM(units=25, return_sequences=True, input_shape=(None, num_x_signals)))
model.add(Dense(1))
#set compiler
model.compile(loss=tf.losses.MeanSquaredError(), optimizer=tf.optimizers.Adam())
model.summary()
"""## Callback Functions"""
#write checkpoints during training
path_checkpoint = '1_checkpoint.keras'
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True)
#stop optimization when performance worsens on the validation set
callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
#write TensorBoard log during training
callback_tensorboard = TensorBoard(log_dir='./1_logs/', histogram_freq=0, write_graph=False)
#reduce learning rate for optimizer if validation loss has not improved since last epoch
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1)
callbacks = [callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr]
"""## LSTM Training"""
validation_data = (np.expand_dims(x_test_scaled, axis=0),
np.expand_dims(y_test_scaled, axis=0))
# Commented out IPython magic to ensure Python compatibility.
# %%time
# history = model.fit(x=train_generator, epochs=50, validation_data=validation_data, callbacks=callbacks)
#Load Checkpoint
#try:
# model.load_weights(path_checkpoint)
#except Exception as error:
# print("Error trying to load checkpoint.")
# print(error)
"""## Testing"""
#performance on test set
result = model.evaluate(x=np.expand_dims(x_test_scaled, axis=0),
y=np.expand_dims(y_test_scaled, axis=0))
print("loss (test-set):", result)
#plot
plt.figure(figsize=(8,6))
plt.plot(history.history['loss'], 'o-', mfc='none', markersize=10, label='Train')
plt.plot(history.history['val_loss'], 'o-', mfc='none', markersize=10, label='Valid')
plt.title('LSTM Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()