Hi,
I’m building an LSTM model with PyTorch 1.2.0 and having a problem with the shape of the LSTM output when feeding it into the final linear layer in the forward method. This error below actually occurs downstream when calculating the loss. I’m losing the batch dimension for the predicted output of the model so it’s comparing a 2D prediction (seq & features) to a 3D data loader based tensor (batch, seq & features). FYI…I’m set batch_first=True in LSTM model initialization.
RuntimeError: Expected object of scalar type Float but got scalar type Int for argument #2 ‘target’
However, I think the root cause is I’m not getting a 3D return from the LSTM output in the forward method. I’m seeing a 2D shape returned (seq and features) and it’s missing the batches dimension. I’m sure it’s something I’m doing/not doing correctly upstream but I’ve been debugging this for hours and just can’t find it.
Greatly appreciate the help!
Here’s the code:
#Define LSTM Class
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, batch_size, num_layers):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, batch_first=True)
self.linear = nn.Linear(self.hidden_dim, output_dim)
def init_hidden(self):
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
lstm_out, self.hidden = self.lstm(input)
# tried several things to make this work before finding out the LSTM output was 2D (i.e. view, reshape, contiguous, etc.) No luck.
y_pred = F.relu(self.linear(lstm_out[-1]))
return y_pred
#Instantiate LSTM model
input_size = 513
hidden_size = 100
num_layers = 2
output_size = 513
lstm_model = LSTM(input_size, hidden_size, output_size, train_batch_size, num_layers)
lstm_model.to(device)
print(lstm_model)
OUTPUT:
LSTM(
(lstm): LSTM(513, 100, num_layers=2, batch_first=True)
(linear): Linear(in_features=100, out_features=513, bias=True)
)
#Train model
learning_rate = 0.001
num_epochs = 5
loss_fn = torch.nn.MSELoss(size_average=False)
optimiser = torch.optim.Adam(lstm_model.parameters(), lr=learning_rate)
for i in range(num_epochs):
lstm_model.zero_grad()
lstm_model.hidden = lstm_model.init_hidden()
for X_train_dl, y_train_dl in train_data_loader:
y_pred = lstm_model(X_train_dl.cuda())
loss = loss_fn(y_pred, y_train_dl.cuda())
optimiser.zero_grad()
loss.backward()
optimiser.step()
Using:
Jupyter Notebook 6.0.1
Python 3.6.9
PyTorch 1.2.0