Nondeterministic results of LSTM

When using dropout in a multilayer LSTM trained on GPU, the weights are different across training. I have reset the same seed before training, so the dataloader has the same behaviour and the model has the same initialization. Note that the results are the same across different runs of the whole script, but different across call of the training routine in the same run of the script.

Say if I use Jupyter notebook, and I repetitively run a cell that resets the seed and trains, the results will be different. What else can I do if I want the same result?

Run the script train.py below. CUBLAS_WORKSPACE_CONFIG=:16:8 python train.py

import random
import copy
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset


class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=6, hidden_size=32, num_layers=2, batch_first=True, dropout=0.1)
        self.linear = nn.Linear(32, 2)
    
    def forward(self, inputs):
        outputs, _ = self.lstm(inputs)
        outputs = torch.mean(outputs, 1)
        outputs = self.linear(outputs)
        return outputs

class Data(Dataset):
    def __init__(self):
        super().__init__()
        self.data = np.random.rand(300).astype(np.float32).reshape(10, 5, 6)
        self.labels = np.random.choice(2, 10).astype(np.int64)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


def train(model, dataloader, criterion, optimizer):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    for _, batch in enumerate(dataloader):
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return

def run_experiment(epochs=2):
    
    train_dataset = Data()
    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = NN().to(device)
    
    optimizer = torch.optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()
    
    for _ in range(epochs):
        train(model, train_loader, criterion, optimizer)    
    
    return copy.deepcopy(model)

def compare_models(model_1, model_2):
    models_differ = 0
    for kv_1, kv_2 in zip(model_1.state_dict().items(), model_2.state_dict().items()):
        assert kv_1[0] == kv_2[0]
        if not torch.equal(kv_1[1], kv_2[1]):
            models_differ += 1
            print('Mismtach found: ', kv_1[0])
    if models_differ == 0:
        print('Models match.')

def set_seed(seed):
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

if __name__ == '__main__':

    SEED = 42    
    
    set_seed(SEED)
    m1 = run_experiment()
    
    set_seed(SEED)
    m2 = run_experiment()
    
    compare_models(m1, m2)

I think the deterministic behavior for each script run is expected, but there is no guarantee to get deterministic output inside the script by rerunning the code.
For the cudnn RNN implementations you can see that the dropout state is initialized once in these lines of code, which explains why you would see the same outcome when rerunning the entire script.

Thanks. I will keep this in mind.