Hi, I dealt with those issues before, and all my CNNs gave me determenistic results, now that I’m using RNNs I’m getting non-determenistic results again.
First all the models are running on the same gpu.
This is my code (in short):
if __name__ == "__main__":
args = parser.parse_args()
#####
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
np.random.seed(args.seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(args.seed)
#####
reviews = pd.read_csv("Reviews.csv")
.......
reviews['review_length'] = reviews['review'].apply(lambda x: len(x.split()))
np.mean(reviews['review_length'])
tok = spacy.load('en_core_web_sm')
....some data manipulation....
X = list(reviews['encoded'])
y = list(reviews['rating'])
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=args.seed)#X[:haluka],X[haluka:],y[:haluka],y[haluka:]#
train_ds = ReviewsDataset(X_train, y_train)
valid_ds = ReviewsDataset(X_valid, y_valid)
train_dl = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
val_dl = DataLoader(valid_ds, batch_size=args.batch_size)
model_fixed = LSTM_fixed_len(vocab_size, 50, 50)
train_model(model_fixed.cuda(), epochs=args.epochs, lr=0.01)
One of my thoughts was that “train_test_split” may cause it, but even when straight forward splitting I got non-deterministic results.
I’ll add the functions:
class ReviewsDataset(Dataset):
def __init__(self, X, Y):
self.X = X
self.y = Y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return torch.from_numpy(self.X[idx][0].astype(np.int32)), self.y[idx], self.X[idx][1]
def tokenize(text):
text = re.sub(r"[^\x00-\x7F]+", " ", text)
regex = re.compile('[' + re.escape(string.punctuation) + '0-9\\r\\t\\n]') # remove punctuation and numbers
nopunct = regex.sub(" ", text.lower())
return [token.text for token in tok.tokenizer(nopunct)]
def encode_sentence(text, vocab2index, N=70):
tokenized = tokenize(text)
encoded = np.zeros(N, dtype=int)
enc1 = np.array([vocab2index.get(word, vocab2index["UNK"]) for word in tokenized])
length = min(N, len(enc1))
encoded[:length] = enc1[:length]
return encoded, length
def train_model(model, epochs, lr=0.001):
global best_acc, best_histo, best_epoch
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=lr)
for i in range(epochs):
model.train()
sum_loss = 0.0
total = 0
for batch_idx,(x, y, l) in enumerate(train_dl):
x = x.long().cuda()
y = y.long().cuda()
l = l.cuda()
y_pred = model(x,l)
optimizer.zero_grad()
loss = F.cross_entropy(y_pred, y)
loss.backward()
optimizer.step()
sum_loss += loss.item() * y.shape[0]
total += y.shape[0]
val_loss, val_acc, histogram = validation_metrics(model, val_dl)
if val_acc > best_acc:
best_acc = val_acc
best_histo = histogram
best_epoch = i
def validation_metrics(model, valid_dl):
model.eval()
correct = 0
total = 0
sum_loss = 0.0
sum_rmse = 0.0
for x, y, l in valid_dl:
x = x.long().cuda()
y = y.long().cuda()
l = l.cuda()
y_hat = model(x, l)
loss = F.cross_entropy(y_hat, y)
pred = torch.max(y_hat, 1)[1]
correct += (pred == y).float().sum()
total += y.shape[0]
sum_loss += loss.item() * y.shape[0]
return sum_loss / total, correct / total
class LSTM_fixed_len(torch.nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim):
super().__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
self.linear = nn.Linear(hidden_dim, 5)
def forward(self, x, l):
x = self.embeddings(x)
lstm_out, (ht, ct) = self.lstm(x)
return self.linear(ht[-1])
I tried to make the code short as possible, but I wanted to show most of it since I don’t know what can make things non-deterministic, I’m NLP newbie so maybe something in the tokenizing process, something in the lstms/embedding?
I’ll be happy for your help, if you got any clue.
two comments:
- Of course that the different results come from the same seed.
- Just wanted to clarify that the code is not mine, and I’m claiming it to be.