Understanding LSTM in pytorch

LSTM in pytroch

LSTM function in pytorch
Detailed explanation of LSTM parameters in pytorch

The following are the parameters

input_size input node dimension
hidden_size number of hidden nodes
num_layers number of layers,

The shape requirement for input x is x: [seq_length, batch_size, input_size]. To understand these three parameters, you can refer to using “animations” and “examples” to talk about RNN, in this article :
Introduced to RNNTime step, time_step, each t is called 1 step, t1 – t5 is 1 cycle, RNN introduces the concept of memory, and the results generated by the previous time step (

Y_{t-1}

Yt?1?) is entered together with the current X.
Understand that time_step is a parameter of the neural network, which will not change once the network is built, and batch is a training parameter, which can be adjusted at any time according to the effect during training.

Build and train an LSTM model

LSTM code implementation

Use the sine and cosine functions to construct a time series, and the sine and cosine functions have a derivative relationship. The value of the cosine function is predicted by inputting the value of the sine function.
Take the value of the sine function as the input of the LSTM and predict the value of the cosine function at the corresponding time. 1 input neuron, 1 output neuron, 16 hidden neurons,

Complete code:

import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt

Define model

class LSTMRNN(nn.Module):
    def __init__(self, input_size, hidden_size=1, output_size=1, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        self.forwardCalculation = nn.Linear(hidden_size, output_size)

    def forward(self, _x):
        '''

        :param _x: input, (seq_len, batch, input_size)
        :return:
        '''
        x, _ = self.lstm(_x)
        s, b, h = x.shape
        x = x.view(s * b, h) # view method adjusts shape
        x = self.forwardCalculation(x)
        x = x.view(s, b, -1) # Adjust the shape again
        return x

Define data

data_len = 200
t = np.linspace(0, 12 * np.pi, data_len)
sin_t = np.sin(t)
cos_t = np.cos(t)

# Create an array of all 0s (data_len, 2), and then write the values of sin and cos into it
dataset = np.zeros((data_len, 2)) # Two-dimensional data
# print(dataset)
dataset[:, 0] = sin_t # first column
dataset[:, 1] = cos_t
dataset = dataset.astype('float32') #Adjust the type
# print(dataset)
# Draw a portion of the original data
plt.figure()
plt.plot(t[0:60], dataset[0:60, 0], label='sin(t)')
plt.plot(t[0:60], dataset[0:60, 1], label='cos(t)')
plt.plot([2.5, 2.5], [-1.3, 0.55], 'r--', label='t=2.5')
plt.plot([6.8, 6.8], [-1.3, 0.85], 'm--', label='t=6.8')
plt.xlabel('t') # time
plt.ylim(-1.2, 1.2)
plt.ylabel('sin(t) and cos(t)')
plt.legend(loc='upper right')
plt.show()

as the picture shows:

Partition the data set

# Divide the dataset into training set and test set, 80% training
train_data_ratio = 0.5
train_data_len = int(data_len * train_data_ratio)
train_x = dataset[:train_data_len, 0]
train_y = dataset[:train_data_len, 1]
INPUT_FEATURES_NUM = 1
OUTPUT_FEATURES_NUM = 1
t_for_training = t[:train_data_len]
# test set
test_x = dataset[train_data_len:, 0]
test_y = dataset[train_data_len:, 1]
t_for_test = t[train_data_len:]

train

# Perform training
train_x_tensor = train_x.reshape(-1, 5, INPUT_FEATURES_NUM) # Each batch is 5,
train_y_tensor = train_y.reshape(-1, 5, OUTPUT_FEATURES_NUM)

#Convert to tensor
train_x_tensor = torch.from_numpy(train_x_tensor)
train_y_tensor = torch.from_numpy(train_y_tensor)
print(train_x_tensor)
print(train_x_tensor.shape)
lstm_model = LSTMRNN(INPUT_FEATURES_NUM, 16, output_size=OUTPUT_FEATURES_NUM, num_layers=1)
print('LSTM model:', lstm_model)
print('model.parameters:', lstm_model.parameters)

loss_fun = nn.MSELoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-2)
max_epochs = 10000
for epoch in range(max_epochs):
    output = lstm_model(train_x_tensor) #The entire train_x_tensor passed in
    loss = loss_fun(output, train_y_tensor)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if loss.item() < 1e-4:
        print('Epoch [{}/{}], Loss: {:.5f}'.format(epoch + 1, max_epochs, loss.item()))
        print("The loss value is reached")
        break
    elif (epoch + 1) % 100 == 0:
        print('Epoch: [{}/{}], Loss:{:.5f}'.format(epoch + 1, max_epochs, loss.item()))

Training process:

View prediction results

predictive_y_for_training = lstm_model(train_x_tensor)
print(predictive_y_for_training)
predictive_y_for_training = predictive_y_for_training.view(-1, OUTPUT_FEATURES_NUM).data.numpy() # Convert to one-dimensional
print(predictive_y_for_training)

test set

#eval
lstm_model = lstm_model.eval() # Convert to test
test_x_tensor = test_x.reshape(-1, 5, INPUT_FEATURES_NUM)
test_x_tensor = torch.from_numpy(test_x_tensor)
predictive_y_for_testing = lstm_model(test_x_tensor)
predictive_y_for_testing = predictive_y_for_testing.view(-1, OUTPUT_FEATURES_NUM).data.numpy()
print(predictive_y_for_testing)

draw

plt.figure()
plt.plot(t_for_training, train_x, 'g', label='sin_trn')
plt.plot(t_for_training, train_y, 'b', label='ref_cos_trn')
plt.plot(t_for_training, predictive_y_for_training, 'y--', label='pre_cos_trn')
plt.plot(t_for_test, test_x, 'c', label='sin_tst')
plt.plot(t_for_test, test_y, 'k', label='ref_cos_tst')
plt.plot(t_for_test, predictive_y_for_testing, 'm--', label='pre_cos_tst')

plt.plot([t[train_data_len], t[train_data_len]], [-1.2, 4.0], 'r--', label='separation line') # separation line

plt.xlabel('t')
plt.ylabel('sin(t) and cos(t)')
plt.xlim(t[0], t[-1])
plt.ylim(-1.2, 4)
plt.legend(loc='upper right')
plt.text(14, 2, "train", size=15, alpha=1.0)
plt.text(20, 2, "test", size=15, alpha=1.0)

plt.show()

as the picture shows:

Time series git:
https://github.com/microprediction/timeseries-notebooks/tree/main
https://github.com/cerlymarco/MEDIUM_NoteBook