Hello,
I’m new to PyTorch and am trying to run an LSTM model on a classification problem. I have two classes: 0 for no crash occurred, and 1 for crash occurred. The matrix has 1000 hours (rows) and 1371 nodes (columns), which correspond to intersections of a road network.
I’ve repeatedly tried to change the code to make it work, but I keep getting these weird predictions no matter what I try. Could anyone help me identify what I could be doing wrong or point me to a resource that can help me understand the issue, please?
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.utils.data as utils
directory = {path}
crash_matrix = pd.read_pickle( directory + 'matrix_hourly.pkl')
crash_matrix = crash_matrix.astype('int32')
crash_matrix = crash_matrix[:1000]
def PrepareDataset(crash_matrix, BATCH_SIZE = 40, seq_len = 30, pred_len = 1, train_propotion = 0.7, valid_propotion = 0.2):
""" Prepare training and testing datasets and dataloaders.
Convert speed/volume/occupancy matrix to training and testing dataset.
The vertical axis of speed_matrix is the time axis and the horizontal axis
is the spatial axis.
Args:
speed_matrix: a Matrix containing spatial-temporal speed data for a network
seq_len: length of input sequence
pred_len: length of predicted sequence
Returns:
Training dataloader
Testing dataloader
"""
np.random.seed(99)
torch.manual_seed(99)
time_len = crash_matrix.shape[0]
max_crash = crash_matrix.max().max()
crash_matrix = crash_matrix / max_crash
crash_sequences, crash_labels = [], []
for i in range(time_len - seq_len - pred_len):
crash_sequences.append(crash_matrix.iloc[i:i+seq_len].values)
crash_labels.append(crash_matrix.iloc[i+seq_len:i+seq_len+pred_len].values)
crash_sequences, crash_labels = np.asarray(crash_sequences), np.asarray(crash_labels)
# shuffle and split the dataset to training and testing datasets
sample_size = crash_sequences.shape[0]
index = np.arange(sample_size, dtype = int)
np.random.shuffle(index)
train_index = int(np.floor(sample_size * train_propotion))
valid_index = int(np.floor(sample_size * ( train_propotion + valid_propotion)))
train_data, train_label = crash_sequences[:train_index], crash_labels[:train_index]
valid_data, valid_label = crash_sequences[train_index:valid_index], crash_labels[train_index:valid_index]
test_data, test_label = crash_sequences[valid_index:], crash_labels[valid_index:]
train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)
train_dataset = utils.TensorDataset(train_data, train_label)
valid_dataset = utils.TensorDataset(valid_data, valid_label)
test_dataset = utils.TensorDataset(test_data, test_label)
train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
return train_dataloader, valid_dataloader, test_dataloader, max_crash
train_dataloader, valid_dataloader, test_dataloader, max_crash = PrepareDataset(crash_matrix)
inputs, labels = next(iter(train_dataloader))
[batch_size, step_size, fea_size] = inputs.size()
input_dim = fea_size
hidden_dim = fea_size
output_dim = fea_size
# LSTM Layer
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTMModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
model = LSTMModel(input_size=input_dim, hidden_size=hidden_dim, num_layers=1, output_size=1371)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X, y.squeeze(1)
# Compute prediction error
pred = model(X)
print(pred.shape)
loss = loss_fn(pred, y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X, y.squeeze(1)
print(y.shape)
pred = model(X)
print(pred.shape)
test_loss += loss_fn(pred, y).item()
print(pred.argmax(1))
print(y.squeeze(1))
correct += (pred.argmax(1) == y.squeeze(1)).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: , Avg loss: {test_loss:>8f} \n")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
When I run the test I get very weird predictions and the following error:
torch.Size([40, 1371])
torch.Size([40, 1371])
tensor([ 140, 928, 140, 140, 140, 664, 664, 140, 140, 140, 664, 664,
664, 140, 664, 1209, 664, 664, 664, 664, 664, 1209, 140, 140,
664, 664, 664, 140, 140, 664, 140, 140, 664, 664, 664, 140,
664, 140, 664, 140])
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[115], line 1
----> 1 test(test_dataloader, model, loss_fn)
Cell In[107], line 15
13 print(pred.argmax(1))
14 print(y.squeeze(1))
---> 15 correct += (pred.argmax(1) == y.squeeze(1)).type(torch.float).sum().item()
16 test_loss /= num_batches
17 correct /= size
RuntimeError: The size of tensor a (40) must match the size of tensor b (1371) at non-singleton dimension 1
Thanks.