How to get predictions of classes to match labels format on LSTM?

Hello,

I’m new to PyTorch and am trying to run an LSTM model on a classification problem. I have two classes: 0 for no crash occurred, and 1 for crash occurred. The matrix has 1000 hours (rows) and 1371 nodes (columns), which correspond to intersections of a road network.

I’ve repeatedly tried to change the code to make it work, but I keep getting these weird predictions no matter what I try. Could anyone help me identify what I could be doing wrong or point me to a resource that can help me understand the issue, please?

import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.utils.data as utils

directory = {path}    
crash_matrix =  pd.read_pickle( directory + 'matrix_hourly.pkl')

crash_matrix = crash_matrix.astype('int32')

crash_matrix = crash_matrix[:1000]

def PrepareDataset(crash_matrix, BATCH_SIZE = 40, seq_len = 30, pred_len = 1, train_propotion = 0.7, valid_propotion = 0.2):
    """ Prepare training and testing datasets and dataloaders.
    
    Convert speed/volume/occupancy matrix to training and testing dataset. 
    The vertical axis of speed_matrix is the time axis and the horizontal axis 
    is the spatial axis.
    
    Args:
        speed_matrix: a Matrix containing spatial-temporal speed data for a network
        seq_len: length of input sequence
        pred_len: length of predicted sequence
    Returns:
        Training dataloader
        Testing dataloader
    """
    np.random.seed(99)
    torch.manual_seed(99)
    time_len = crash_matrix.shape[0]
    
    max_crash = crash_matrix.max().max()
    crash_matrix =  crash_matrix / max_crash
    
    crash_sequences, crash_labels = [], []
    for i in range(time_len - seq_len - pred_len):
        crash_sequences.append(crash_matrix.iloc[i:i+seq_len].values)
        crash_labels.append(crash_matrix.iloc[i+seq_len:i+seq_len+pred_len].values)
    crash_sequences, crash_labels = np.asarray(crash_sequences), np.asarray(crash_labels)
    
    # shuffle and split the dataset to training and testing datasets
    sample_size = crash_sequences.shape[0]
    index = np.arange(sample_size, dtype = int)
    np.random.shuffle(index)
    
    train_index = int(np.floor(sample_size * train_propotion))
    valid_index = int(np.floor(sample_size * ( train_propotion + valid_propotion)))
    
    train_data, train_label = crash_sequences[:train_index], crash_labels[:train_index]
    valid_data, valid_label = crash_sequences[train_index:valid_index], crash_labels[train_index:valid_index]
    test_data, test_label = crash_sequences[valid_index:], crash_labels[valid_index:]
    
    train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
    valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
    test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)
    
    train_dataset = utils.TensorDataset(train_data, train_label)
    valid_dataset = utils.TensorDataset(valid_data, valid_label)
    test_dataset = utils.TensorDataset(test_data, test_label)
    
    train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    
    return train_dataloader, valid_dataloader, test_dataloader, max_crash

train_dataloader, valid_dataloader, test_dataloader, max_crash = PrepareDataset(crash_matrix)

inputs, labels = next(iter(train_dataloader))
[batch_size, step_size, fea_size] = inputs.size()
input_dim = fea_size
hidden_dim = fea_size
output_dim = fea_size

# LSTM Layer
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

model = LSTMModel(input_size=input_dim, hidden_size=hidden_dim, num_layers=1, output_size=1371)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X, y.squeeze(1)

        # Compute prediction error
        pred = model(X)
        print(pred.shape)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X, y.squeeze(1)
            print(y.shape)
            pred = model(X)
            print(pred.shape)
            test_loss += loss_fn(pred, y).item()
            print(pred.argmax(1))
            print(y.squeeze(1))
            correct += (pred.argmax(1) == y.squeeze(1)).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: , Avg loss: {test_loss:>8f} \n")

train(train_dataloader, model, loss_fn, optimizer)

test(test_dataloader, model, loss_fn)

When I run the test I get very weird predictions and the following error:


torch.Size([40, 1371])
torch.Size([40, 1371])
tensor([ 140,  928,  140,  140,  140,  664,  664,  140,  140,  140,  664,  664,
         664,  140,  664, 1209,  664,  664,  664,  664,  664, 1209,  140,  140,
         664,  664,  664,  140,  140,  664,  140,  140,  664,  664,  664,  140,
         664,  140,  664,  140])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[115], line 1
----> 1 test(test_dataloader, model, loss_fn)

Cell In[107], line 15
     13         print(pred.argmax(1))
     14         print(y.squeeze(1))
---> 15         correct += (pred.argmax(1) == y.squeeze(1)).type(torch.float).sum().item()
     16 test_loss /= num_batches
     17 correct /= size

RuntimeError: The size of tensor a (40) must match the size of tensor b (1371) at non-singleton dimension 1

Thanks.