I am trying to understand LSTM. I have thought of making it step wise in the manner i learn it and the first task is to estimate the next number in the sequence 1,2,3,4. When this work I will try known non-linear functions before using my dataset on it.
I have made this simple code but it does not predict what I want so I have a few questions:
- When defining the neural network you define it like this:
class LSTM(torch.nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.lstm = torch.nn.LSTM(10, 20, 1)
self.fc = torch.nn.Linear(20, 2)
self.flatten = torch.nn.Flatten(start_dim=1, end_dim=2) # Adjust start_dim and end_dim
def forward(self, x):
x, _ = self.lstm(x)
x = self.fc(x)
x = self.flatten(x)
return x
10 means the input size i.e the dimension of my feature.
the hidden state is hyperparameter that can be tuned
and then i want to flatten it to one because i am mapping a sequence 1,2,3,4,5 to 6.
However this does not work:
Firstly i get this warning:
UserWarning: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([1, 20])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
Furthermore when I test it I get:
Input: [ 1 2 3 4 5 6 7 8 9 10]
Output: tensor([[10.2278, 10.2459, 10.8441, 10.8658, 11.4653, 11.4891, 12.0948, 12.1193,
12.7373, 12.7615, 13.3987, 13.4216, 14.0856, 14.1065, 14.8050, 14.8237,
15.5644, 15.5806, 16.3699, 16.3839]])
Actual: 11
:
What I wanted was the input to be the sequence and the output of the model to be one single y value. I have pasted my code below and it would be nice to see what mistake I made since this is a simple problem:
import torch
import numpy as np
data = np.arange(0, 100, 1)
def shift_data(window_size, data):
x = []
y = []
for i in range(len(data) - window_size):
x.append(data[i:i+window_size])
y.append(data[i+window_size])
return np.array(x), np.array(y)
x, y = shift_data(10, data)
class dataset(torch.utils.data.Dataset):
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return len(self.x)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
dataset = dataset(x, y)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_data, test_data = torch.utils.data.random_split(dataset, [train_size, test_size])
class LSTM(torch.nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.lstm = torch.nn.LSTM(10, 20, 1)
self.fc = torch.nn.Linear(20, 2)
self.flatten = torch.nn.Flatten(start_dim=1, end_dim=2) # Adjust start_dim and end_dim
def forward(self, x):
x, _ = self.lstm(x)
x = self.fc(x)
x = self.flatten(x)
return x
model = LSTM()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for epoch in range(100):
for i, (x, y) in enumerate(train_data):
optimizer.zero_grad()
x_tensor = torch.tensor(x).float().unsqueeze(0).unsqueeze(2)
y_tensor = torch.tensor(y).float().unsqueeze(0)
output = model(x_tensor)
loss = criterion(output, y_tensor)
loss.backward()
optimizer.step()
print('Epoch:', epoch, 'Loss:', loss.item())
model.eval()
with torch.no_grad():
for x, y in test_data:
x_tensor = torch.tensor(x).float().unsqueeze(0).unsqueeze(2)
output = model(x_tensor)
print('Input:', x, ' Output:', output, ' Actual:', y)