I am experiencing the same problem, I can’t figure out how to declare the dimension of the input for the Linear layer in the forward function:
I want to feed the lstm with batches of 256 sequences. Every sequence has 20 elements.
My DataLoader provides inputs of shape [256,20,1] that would be [batch_size,len_sequence,num_features] and my labels are tensors of 256 elements.
I know that the lstm receive by default as input [len_sequence,batch_size,num_features] but I am specifying batch_first = True so it rearrange the input by itself and also the output will be of shape [batch_size,len_sequence,num_features].
how do I have to declare the Linear layer?
For now I am using the following model and train but the predicted value y_pred has ha size of [20*256] that is messing up my loss function.
class LSTM(nn.Module):
def __init__(self, input_dim=1, hidden_dim=64, batch_size=BATCH_SIZE, output_dim=1,
num_layers=2, sequence_length=train_window):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.sequence_length = sequence_length
# Define the LSTM layer
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, batch_first=True)
# Define the output layer
self.linear = nn.Linear(self.hidden_dim, output_dim)
def init_hidden(self):
# This is what we'll initialise our hidden state as
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
lstm_out, self.hidden = self.lstm(input.view(len(input),self.sequence_length,-1))
y_pred = self.linear(lstm_out)
return y_pred.view(-1)
model = LSTM()
loss_fn = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.001)
####################
# Train model
#####################
num_epochs =500
tot_iterations = round(len(train_data_normalized)/BATCH_SIZE)
hist = np.zeros(num_epochs)
for t in range(1,num_epochs+1):
iteration = 0
for seq, labels in Train_set:
iteration += 1
# Initialise hidden state
# Don't do this if you want your LSTM to be stateful
model.hidden = model.init_hidden()
# Forward pas
y_pred = model(seq)
print(y_pred.shape,labels)
loss = loss_fn(y_pred, labels)
if iteration%25 == 0:
print(f'epoch: {t:1} iteration {iteration:1}/{tot_iterations:3} loss: {loss.item():10.8f}')
hist[t] = loss.item()
# Zero out gradient, else they will accumulate between epochs
optimiser.zero_grad()
# Backward pass
loss.backward()
# Update parameters
optimiser.step()
Any suggestion would be appreciated, thanks a lot!