```
class MyModel(someBaseModel):
def __init__(self):
super().__init__()
self.gru = nn.GRU(20, 50, 1, batch_first=True) # input, hiddensize, layers
self.classifier = nn.Linear(50, 4)
def forward(self, xb): # xb is a packed padded sequence of size (batch_size*max_length, 20)
out, hn = self.gru(xb) # out is packed padded sequence with data of size (bs*ml, 50)
out = self.classifier(out.data) # (bs*ml, 4) Now it is not a packed padded sequence but normal tensor
return out
```

And my loss calculation looks like:

```
def training_step(batch):
inputs, labels = batch
inputs = inputs.to("cuda:2") # Packed Padded Sequence of size (bs*ml, 20)
# labels = torch.Tensor(labels).to("cuda:2") # Won't work since all labels are of different lengths
batch_sizes = inputs.batch_sizes
out = model(inputs) # Generate predictions
out_padded, out_lengths = pad_packed_sequence(torch.nn.utils.rnn.PackedSequence(out, batch_sizes), batch_first=True)
# out_padded is of shape (bs, ml, 4)
loss = torch.zeros(out_padded.size(0))
for i in range(out_padded.size(0)):
msel = F.mse_loss(out_padded[i][:out_lengths[i]], labels[i].to("cuda:2"))
loss[i] = msel
acc = torch.mean(loss)
return loss, acc
for batch in train_loader:
loss, acc = model.training_step(batch)
train_losses.append(torch.mean(loss))
train_accs.append(acc)
torch.sum(loss).backward() # [l.backward() for l in loss] won't work: RuntimeError: Trying to backward through the graph a second time
optimizer.step()
optimizer.zero_grad()
```