I’m trying to create a LSTM model that will perform binary classification on a custom dataset. The dataset is a CSV file of about 5,000 records. The features are field 0-16 and the 17th field is the label. I’d like the model to be two layers deep with 128 LSTM cells in each layer. Below is the code that I’m trying to get to run:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lr = 0.0005
n_epochs = 10
input_dim = 17
hidden_dim = 128
layer_dim = 2
output_dim = 2
batch_size = 50
class FeatureDataSet(torch.utils.data.Dataset):
def __init__(self, fileName):
fileOut = pd.read_csv(fileName)
x_train = fileOut.iloc[1:, 0:16].values
y_train = fileOut.iloc[1:, 17].values
self.x_train = torch.tensor(x_train, dtype=torch.float32)
self.y_train = torch.tensor(y_train)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx], self.y_train[idx]
trainingDataset = FeatureDataSet("dataSetCSV.csv")
train_loader = torch.utils.data.DataLoader(dataset=trainingDataset, batch_size=batch_size, shuffle=True)
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
self.batch_size = batch_size
self.hidden = None
def forward(self, x):
#initializing the hidden states
h0, c0 = self.init_hidden(x)
out, (hn, cn) = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t.cpu() for t in (h0, c0)]
model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
print(model)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print('Start model training')
for epoch in range(1, n_epochs + 1):
for i, (x_batch, y_batch) in enumerate(train_loader):
#x_batch = x_batch.reshape(-1, x_batch).to(device)
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
opt.zero_grad()
out = model(x_batch)
loss = criterion(out, y_batch)
loss.backward()
opt.step()
When I run this code I get the error, “input must have 3 dimensions, got 2”.