LSTM Binary Classification with custom data set - input must have 3 dimensions, got 2 error

user12233 · March 28, 2021, 4:58pm

I’m trying to create a LSTM model that will perform binary classification on a custom dataset. The dataset is a CSV file of about 5,000 records. The features are field 0-16 and the 17th field is the label. I’d like the model to be two layers deep with 128 LSTM cells in each layer. Below is the code that I’m trying to get to run:

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

lr = 0.0005
n_epochs = 10
input_dim = 17    
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50

class FeatureDataSet(torch.utils.data.Dataset):

def __init__(self, fileName):
   fileOut = pd.read_csv(fileName)
   x_train = fileOut.iloc[1:, 0:16].values
   y_train = fileOut.iloc[1:, 17].values
   self.x_train = torch.tensor(x_train, dtype=torch.float32)
   self.y_train = torch.tensor(y_train)
def __len__(self):
    return len(self.y_train)
 def __getitem__(self, idx):
    return self.x_train[idx], self.y_train[idx]

 trainingDataset = FeatureDataSet("dataSetCSV.csv")
 train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
  class RNN(nn.Module):

def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
    super().__init__()
    self.hidden_dim = hidden_dim
    self.layer_dim = layer_dim
    self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
    self.fc = nn.Linear(hidden_dim, output_dim)
    self.batch_size = batch_size
    self.hidden = None

def forward(self, x):
    #initializing the hidden states
    h0, c0 = self.init_hidden(x)
    out, (hn, cn) = self.lstm(x, (h0, c0))
    out = self.fc(out[:, -1, :])
    return out

def init_hidden(self, x):
    h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
    c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
    return [t.cpu() for t in (h0, c0)]

 model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
 print(model)
 criterion = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  print('Start model training')

for epoch in range(1, n_epochs + 1):

    for i, (x_batch, y_batch) in enumerate(train_loader):
        #x_batch = x_batch.reshape(-1, x_batch).to(device)
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
  
        opt.zero_grad()
        out = model(x_batch)
        loss = criterion(out, y_batch)
        loss.backward()
        opt.step()

When I run this code I get the error, “input must have 3 dimensions, got 2”.

ptrblck · March 30, 2021, 5:27am

Could you check the shape of x before feeding it into the self.lstm?
Based on the error it might only have two dimensions, while three are expected ([batch_size, seq_len, nb_features] in your case, since you are using batch_first=True).

user12233 · April 15, 2021, 7:32pm

Thanks for the help after checking the size I realized it was an issue with my dataset. Below is the line of code that I changed in case that helps anyone. I did have to do a little math to get the dimensions to line up.

x_batch = x_batch.reshape(batch_size,  1, input_dim).to(device)