Different result on different gpu

Hi, I’m a little bit confused about the reproducibility of LSTM in pytorch.

Things I have already done:

def setup_seed(seed: int) -> None:
    CUDA = torch.cuda.is_available()
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if CUDA:
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

set CUBLAS_WORKSPACE_CONFIG before import torch

import os
# for reproducibility, must before import torch
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"  # noqa

I have 6 gpus in my machine(centos 7, pytorch 1.7, cudatoolkit 10.2), 2 tesla v100 32GB, 2 tesla V100 16GB, 2 tesla M40.

And on same type gpu, the results are same. But on different type gpus results are different. Each result is reproducible.

Here is my model:

class PreEmbeddings(nn.Module):
    """Construct the embeddings from pretrained embeddings."""

    def __init__(self, config, pretrained_embeddings):
        super().__init__()
        pretrained_embeddings = pretrained_embeddings.astype('float32')
        self.word_embeddings = nn.Embedding.from_pretrained(torch.from_numpy(pretrained_embeddings))
        self.dropout = nn.Dropout(config["embed_dropout_prob"])

    def forward(self, input_ids, class_relatedness_ids=None):
        embeddings = self.word_embeddings(input_ids)
        embeddings = self.dropout(embeddings)
        return embeddings


class RelatedEmbeddings(nn.Module):
    """Construct the embeddings from relatedness between words and labels."""

    def __init__(self, config, related_embeddings):
        super().__init__()
        related_embeddings = related_embeddings.astype('float32')
        self.relatedness = nn.Embedding.from_pretrained(torch.from_numpy(related_embeddings))


    def forward(self, input_ids):
        relatedness = torch.mean(self.relatedness(input_ids), dim=1)
        return relatedness


class LSTMClassifier(torch.nn.Module):
    def __init__(self, config, pretrained_embeddings, related_embeddings):
        super().__init__()
        self.config = config
        self.word_embeddings = PreEmbeddings(config, pretrained_embeddings)
        self.relatedness = RelatedEmbeddings(config, related_embeddings)
        self.lstm = nn.LSTM(config["embed_dim"], config["embed_dim"]//2,
                            batch_first=True,
                            bidirectional=True,
                            num_layers=2
                            )
        self.fc1 = nn.Linear(
            config["embed_dim"]//2 + len(config['keywords']) * config['aug'], config["num_classes"])

    def forward(self, input_ids):
        word_embeddings = self.word_embeddings(input_ids)
        relatedness = self.relatedness(input_ids)
        lstm_out, (ht, ct) = self.lstm(word_embeddings)
        if self.config["aug"]:
            comb = torch.cat((ht[-1], relatedness), dim=1)
            x = self.fc1(comb)
        else:
            x = self.fc1(ht[-1])
        return x

Is it possible to get same result on different type gpu?

1 Like

I don’t think this is universally possible due to the different hardware architectures.
The reproducibility should work on the same device though, which seems to be the case here.

2 Likes

I have met the same problem with you. In my case, CUDA0 and CUDA1 devices get different results for a common CNN model.