RNN Examples

Description for RNN Examples notebook.

Notebook Contents

This notebook covers:

  • Topic 1
  • Topic 2
  • Topic 3

Use the buttons above to download the notebook or open it in your preferred environment.

πŸ““ Notebook Preview

L3d: Recurrent Neural NetworksΒΆ

InΒ [14]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from patsy import dmatrices
from collections import OrderedDict
import itertools
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import scale
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, RNN, GRU, Attention
from tensorflow.keras.layers import Dropout
from tensorflow.keras import layers, Input
#from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
InΒ [3]:
def reset_weights(model):
	"""This function re-initializes model weights at each compile"""
	for layer in model.layers: 
		if isinstance(layer, tf.keras.Model):
			reset_weights(layer)
			continue
	for k, initializer in layer.__dict__.items():
		if "initializer" not in k:
			continue
		# find the corresponding variable
		var = getattr(layer, k.replace("_initializer", ""))
		var.assign(initializer(var.shape, var.dtype))

In this example, we will use an LSTM to predict monthly average temperatures in Boston, using the 12-month lagged dataset.ΒΆ

InΒ [4]:
#Read the data set into a pandas DataFrame
df = pd.read_csv('data/boston_monthly_avg_temps_1978_2019.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
#resample at a month level
#df_resampled = df.resample('M').mean()
/tmp/ipykernel_45382/3110023198.py:2: FutureWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.
  df = pd.read_csv('data/boston_monthly_avg_temps_1978_2019.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
InΒ [5]:
df.head()
Out[5]:
TAVG TAVG_LAG_1 TAVG_LAG_2 TAVG_LAG_3 TAVG_LAG_4 TAVG_LAG_5 TAVG_LAG_6 TAVG_LAG_7 TAVG_LAG_8 TAVG_LAG_9 TAVG_LAG_10 TAVG_LAG_11 TAVG_LAG_12
DATE
1980-01-31 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333 42.451613 23.071429 32.451613
1980-02-29 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333 42.451613 23.071429
1980-03-31 36.822581 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333 42.451613
1980-04-30 48.716667 36.822581 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333
1980-05-31 59.387097 48.716667 36.822581 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032
InΒ [6]:
split_index = round(len(df)*0.8)
split_date = df.index[split_index]
df_train = df.loc[df.index <= split_date].copy()
df_test = df.loc[df.index > split_date].copy()
InΒ [7]:
X_train = df_train.iloc[:,1:13]
y_train = df_train.iloc[:,0]
X_test = df_test.iloc[:,1:13] 
y_test = df_test.iloc[:,0]
InΒ [8]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
(383, 12)
(383,)
(95, 12)
InΒ [28]:
X_train.head()
Out[28]:
TAVG_LAG_1 TAVG_LAG_2 TAVG_LAG_3 TAVG_LAG_4 TAVG_LAG_5 TAVG_LAG_6 TAVG_LAG_7 TAVG_LAG_8 TAVG_LAG_9 TAVG_LAG_10 TAVG_LAG_11 TAVG_LAG_12
DATE
1980-01-31 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333 42.451613 23.071429 32.451613
1980-02-29 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333 42.451613 23.071429
1980-03-31 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333 42.451613
1980-04-30 36.822581 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032 48.683333
1980-05-31 48.716667 36.822581 27.810345 29.403226 36.629032 48.566667 52.629032 64.883333 71.661290 74.483871 68.166667 61.129032
InΒ [32]:
np.asarray(X_train)
Out[32]:
array([[36.62903226, 48.56666667, 52.62903226, ..., 42.4516129 ,
        23.07142857, 32.4516129 ],
       [29.40322581, 36.62903226, 48.56666667, ..., 48.68333333,
        42.4516129 , 23.07142857],
       [27.81034483, 29.40322581, 36.62903226, ..., 61.12903226,
        48.68333333, 42.4516129 ],
       ...,
       [73.90322581, 77.30645161, 67.08333333, ..., 44.78333333,
        55.59677419, 68.65      ],
       [67.28333333, 73.90322581, 77.30645161, ..., 32.67741935,
        44.78333333, 55.59677419],
       [57.53225806, 67.28333333, 73.90322581, ..., 27.56451613,
        32.67741935, 44.78333333]])

Now we reshape in the inputs and targets as [samples, timesteps, features]. Think of "timesteps" as the number of lag variables.

InΒ [9]:
X_train = np.reshape(np.asarray(X_train), (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(np.asarray(X_test), (X_test.shape[0], X_test.shape[1], 1))
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
InΒ [36]:
print(X_train.shape)
(383, 12, 1)

LSTM implementationΒΆ

InΒ [11]:
reset_weights(model)
InΒ [12]:
K.clear_session()
model = Sequential()
model.add(LSTM(units = 8, input_shape = (X_train.shape[1], X_train.shape[2])))
# model.add(Attention())  # This applies self-attention
# model.add(Flatten()) # Flatten or use GlobalAveragePooling to reduce to 1D
model.add(Dense(1, activation = "linear")) # transform hidden unit activations to single output (monthly average temperature)
model.compile(loss = 'mse', optimizer = 'adam')
InΒ [43]:
#model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0);
InΒ [13]:
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┑━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
β”‚ lstm (LSTM)                     β”‚ (None, 8)              β”‚           320 β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ dense (Dense)                   β”‚ (None, 1)              β”‚             9 β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
 Total params: 329 (1.29 KB)
 Trainable params: 329 (1.29 KB)
 Non-trainable params: 0 (0.00 B)
InΒ [63]:
early_stopping = EarlyStopping(monitor='loss', patience=7, min_delta = 0.001, restore_best_weights=True, 
                                mode = "min")
InΒ [86]:
hist = model.fit(X_train, y_train, batch_size = 16,
                 validation_data = (X_test, y_test), 
                 verbose = 0, epochs = 1000)
InΒ [122]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='Train MSE')
  plt.plot(history.history['val_loss'], label='Test MSE')
  #plt.ylim([0, 40]) # adjust to zoom in/out accordingly
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend()
  plt.grid(True)
InΒ [123]:
plot_loss(hist)
No description has been provided for this image
InΒ [91]:
plot_loss(hist) # with ylimit at 40
No description has been provided for this image

We see that the optimism begins to increase significantly after 600 epochs (the early stopping monitor should automatically revert the weights of the network to the optimal point)

InΒ [102]:
y_pred = model.predict(X_test)
InΒ [114]:
y_test_pred = pd.DataFrame(df_test.iloc[:,0])
y_test_pred.columns  = ['Observed']
y_test_pred['Predicted'] = y_pred
InΒ [116]:
y_test_pred.plot(figsize=(15,5))
Out[116]:
<AxesSubplot:xlabel='DATE'>
No description has been provided for this image
InΒ [120]:
error = y_pred.flatten() - y_test
plt.hist(error, bins=25)
plt.xlabel('Residuals')
plt.ylabel('Count')
Out[120]:
Text(0, 0.5, 'Count')
No description has been provided for this image
InΒ [121]:
np.mean(error)
Out[121]:
-0.9041021991419528