Hi, I’m a little bit confused about the reproducibility of LSTM in pytorch.
Things I have already done:
def setup_seed(seed: int) -> None:
CUDA = torch.cuda.is_available()
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if CUDA:
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
set CUBLAS_WORKSPACE_CONFIG before import torch
import os
# for reproducibility, must before import torch
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # noqa
I have 6 gpus in my machine(centos 7, pytorch 1.7, cudatoolkit 10.2), 2 tesla v100 32GB, 2 tesla V100 16GB, 2 tesla M40.
And on same type gpu, the results are same. But on different type gpus results are different. Each result is reproducible.
Here is my model:
class PreEmbeddings(nn.Module):
"""Construct the embeddings from pretrained embeddings."""
def __init__(self, config, pretrained_embeddings):
super().__init__()
pretrained_embeddings = pretrained_embeddings.astype('float32')
self.word_embeddings = nn.Embedding.from_pretrained(torch.from_numpy(pretrained_embeddings))
self.dropout = nn.Dropout(config["embed_dropout_prob"])
def forward(self, input_ids, class_relatedness_ids=None):
embeddings = self.word_embeddings(input_ids)
embeddings = self.dropout(embeddings)
return embeddings
class RelatedEmbeddings(nn.Module):
"""Construct the embeddings from relatedness between words and labels."""
def __init__(self, config, related_embeddings):
super().__init__()
related_embeddings = related_embeddings.astype('float32')
self.relatedness = nn.Embedding.from_pretrained(torch.from_numpy(related_embeddings))
def forward(self, input_ids):
relatedness = torch.mean(self.relatedness(input_ids), dim=1)
return relatedness
class LSTMClassifier(torch.nn.Module):
def __init__(self, config, pretrained_embeddings, related_embeddings):
super().__init__()
self.config = config
self.word_embeddings = PreEmbeddings(config, pretrained_embeddings)
self.relatedness = RelatedEmbeddings(config, related_embeddings)
self.lstm = nn.LSTM(config["embed_dim"], config["embed_dim"]//2,
batch_first=True,
bidirectional=True,
num_layers=2
)
self.fc1 = nn.Linear(
config["embed_dim"]//2 + len(config['keywords']) * config['aug'], config["num_classes"])
def forward(self, input_ids):
word_embeddings = self.word_embeddings(input_ids)
relatedness = self.relatedness(input_ids)
lstm_out, (ht, ct) = self.lstm(word_embeddings)
if self.config["aug"]:
comb = torch.cat((ht[-1], relatedness), dim=1)
x = self.fc1(comb)
else:
x = self.fc1(ht[-1])
return x
Is it possible to get same result on different type gpu?