LSTM - Longitudinal Patient Data

I am trying to predict the risk of an event based on patient data such as lab results. Patient might have missing labs or might only have n labs where n<max_observed_months. To handle this I used packed sequences. Also, I want to predict at each month - so the LSTM should be many to many. The problem is the loss is not reducing after a certain point and also auc is stuck around .54.

The data is a n x 5 x 12 matrix where n is the # of patients and 5 features are observed for 12 months or less.

Below is my code, I would really appreciate if you can provide some tips!

class PatientDataset2(Dataset):
    def __init__(self, data, target) -> None:
        self.data = data
        self.targets = target
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        sequence = self.data[index]
        target = self.targets[index]
        return sequence, len(sequence) , target

def collate_fn(data):
    data.sort(key=lambda x: x[1], reverse=True)
    sequences, lengths, targets = zip(*data)
    # sequences, lengths = zip(*data)

    sequences_padded = pad_sequence([torch.as_tensor(seq) for seq in sequences], batch_first=True)
    targets_padded = pad_sequence([torch.as_tensor(targ) for targ in targets], batch_first=True)

    return sequences_padded, torch.LongTensor(lengths), targets_padded


sequence_length = 12
input_size = 5
hidden_size = 25
num_layers = 2
num_classes = 1
batch_size = 100
num_epochs = 500
learning_rate = 0.001

# Recurrent neural network (many-to-many)
class PtLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(PtLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=.1)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x, lengths):
        packed = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        lstm_out, hidden = self.lstm(packed)
        outputs = self.fc(lstm_out[0])
        # print(outputs.shape)
        return outputs.squeeze(1)
        

model = PtLSTM(input_size, hidden_size, num_layers, num_classes).to(device)
train_dataset = PatientDataset2(x_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# Loss and optimizer
# criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([166]).to(device))
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_dataloader)
for epoch in range(num_epochs):
    for i, (seqs, lengths, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        seqs = seqs.float().to(device)
        labels = labels.to(device)
        targets = pack_padded_sequence(labels, lengths, batch_first=True)[0]
        # Forward pass
        outputs = model(seqs, lengths)
        # print(labels.shape)
        loss = criterion(outputs, targets)
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        if (i+1) % 1 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Since you define your nn.LSTM with batch_first=False, after

lstm_out, hidden = self.lstm(packed)

The shape of lstm_out is (batch_size, seq_len, hidden_size​). This means, with lstm_out[0] in the next line, you consider only the first sequence in your batch. This would only be OK if you work only with batches of size 1.

1 Like

Unrelated to your question. I’m wondering if you needed to create the class PtLSTM instead of specifying an output number of channels (different from the hidden channels) in the LSTM layer?