Using seed function still get different result

Hi
I have a question, why I use the seed function? but I still have a different result?
BATCH_SIZE = 64
MAX_VOCAB_SIZE = 25_000
import torch
import torch.nn as nn
import torch.optim as optim
import time
import random
from torchtext.legacy import data
from torchtext.legacy import datasets
import numpy as np
from torch.autograd import Variable
import torch.optim as optim
import matplotlib.pyplot as plt

seed=0

random.seed(seed)
np.random.seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

if seed == 0:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

print(torch.cuda.get_device_name(0))

batch_first = True

TEXT.build_vocab(train_data,
max_size = MAX_VOCAB_SIZE,
vectors = “glove.6B.300d”,
unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)
print(f’Number of training examples: {len(train_data)}’)
print(f’Number of training examples: {len(valid_data)}’)
print(f’Number of testing examples: {len(test_data)}’)

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
(train_data, valid_data, test_data),
batch_size = BATCH_SIZE,
sort_within_batch = True,
device = device)

class CNN(nn.Module):

def init():

super().init

class RNN(nn.Module):
def init(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout, pad_idx):

    super().__init__()
    
    self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
    
    
    self.rnn = nn.LSTM(embedding_dim, 
                       hidden_dim, 
                       num_layers=n_layers, 
                       bidirectional=True, 
                       dropout=dropout,
                       )
    
    self.fc = nn.Linear(hidden_dim * 2, output_dim)
    
    self.dropout = nn.Dropout(dropout)
    
def forward(self, text, text_lengths):
    
    
    
    embedded = self.dropout(self.embedding(text))
    packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'))
    
    packed_output, (hidden, cell) = self.rnn(packed_embedded)
    
    #unpack sequence
    output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)

    #output = [sent len, batch size, hid dim * num directions]
    #output over padding tokens are zero tensors
    
    #hidden = [num layers * num directions, batch size, hid dim]
    #cell = [num layers * num directions, batch size, hid dim]
    
    #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
    #and apply dropout
    
    hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
    
 
    # embedded =embedded.permute(1,0,2)
    # _,idx = text_lengths.sort(0,descending = True)
    # _, un_idx = t.sort(idx,dim =0)
    
    # packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first = True)
  
    
    # packed_output, (hidden, cell) = self.rnn(packed_embedded)
    # output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output,batch_first = True)
    # idx = (torch.cuda.LongTensor(text_lengths) - 1).view(-1, 1).expand(len(text_lengths), output.size(2))
    # time_dimension = 1 if batch_first else 0
    # idx = idx.unsqueeze(time_dimension)
    # if output.is_cuda:
    #     idx = idx.cuda(output.data.get_device())

    # last_output = output.gather(time_dimension, Variable(idx)).squeeze(time_dimension)

        
    return self.fc( hidden)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 300
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(INPUT_DIM,
EMBEDDING_DIM,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT,
PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)

UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)
def binary_accuracy(preds, y):
rounded_preds = torch.round(torch.sigmoid(preds))
correct = (rounded_preds == y).float()
acc = correct.sum() / len(correct)
return acc
def train(model, iterator, optimizer, criterion):

epoch_loss = 0
epoch_acc = 0

model.train()

for batch in iterator:
    
    optimizer.zero_grad()
    
    text, text_lengths = batch.text
    
    predictions = model(text, text_lengths).squeeze(1)
    label = batch.label -2 
    loss = criterion(predictions, label.squeeze(0))
    
    acc = binary_accuracy(predictions, label.squeeze(0))
    
    loss.backward()
    
    optimizer.step()
    
    epoch_loss += loss.item()
    epoch_acc += acc.item()
    
return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion):

epoch_loss = 0
epoch_acc = 0

model.eval()

with torch.no_grad():

    for batch in iterator:

        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths).squeeze(1)
        label = batch.label -2 
        
        loss = criterion(predictions,label.squeeze(0))
        
        acc = binary_accuracy(predictions, label.squeeze(0))

        epoch_loss += loss.item()
        epoch_acc += acc.item()
    
return epoch_loss / len(iterator), epoch_acc / len(iterator)

import time

def epoch_time(start_time, end_time):
elapsed_time = end_time - start_time
elapsed_mins = int(elapsed_time / 60)
elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
return elapsed_mins, elapsed_secs
N_EPOCHS = 3

best_valid_loss = float(‘inf’)
test_losss = []
train_losss = []
train_accs = []
test_accs = []
for epoch in range(N_EPOCHS):

start_time = time.time()

train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
test_losss.append(valid_loss)
train_losss.append(train_loss)
train_accs.append(train_acc)
test_accs.append(valid_acc)
end_time = time.time()

epoch_mins, epoch_secs = epoch_time(start_time, end_time)

if valid_loss < best_valid_loss:
    best_valid_loss = valid_loss
    torch.save(model.state_dict(), 'tut2-model.pt')
# plt_train = plt.plot(train_losss)
# plt_train = plt.plot(train_accs)
# plt_test = plt.plot(test_losss)
# plt_test = plt.plot(test_accs)


print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Check the Reproducibility docs, which mention additional ops such as torch.use_deterministic_algorithms(). If a non-deterministic operation is found, it should raise an error.

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier. :wink:

BATCH_SIZE = 64
MAX_VOCAB_SIZE = 25_000
batch_first = True
TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    device = device)

class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        
        self.rnn = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=True, 
                           dropout=dropout,
                           )
        
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text, text_lengths):
        
        
        
        embedded = self.dropout(self.embedding(text))
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'))
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
            
        return self.fc(hidden)
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            DROPOUT, 
            PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)

UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)



optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)
def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() 
    acc = correct.sum() / len(correct)
    return acc
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths).squeeze(1)
        label = batch.label  
        loss = criterion(predictions, label.squeeze(0))
        
        acc = binary_accuracy(predictions, label.squeeze(0))
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text, text_lengths = batch.text
            
            predictions = model(text, text_lengths).squeeze(1)
            label = batch.label  
            
            loss = criterion(predictions,label.squeeze(0))
            
            acc = binary_accuracy(predictions, label.squeeze(0))

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs
N_EPOCHS = 5

best_valid_loss = float('inf')
test_losss = [] 
train_losss = [] 
train_accs = [] 
test_accs = [] 
for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    test_losss.append(valid_loss)
    train_losss.append(train_loss)
    train_accs.append(train_acc)
    test_accs.append(valid_acc)
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')
    # plt_train = plt.plot(train_losss)
    # plt_train = plt.plot(train_accs)
    # plt_test = plt.plot(test_losss)
    # plt_test = plt.plot(test_accs)
  
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

it still not work for me, since I use google colab and using IMDB dataset, it will take 15 mins to load data, so I added a cell to load data

TEXT = data.Field(tokenize= 'spacy', tokenizer_language='en_core_web_sm',include_lengths = True)
LABEL = data.LabelField(dtype = torch.float)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
LABEL.build_vocab(train_data)
print(f'Number of training examples: {len(train_data)}')
print(f'Number of testing examples: {len(test_data)}')
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")
print(LABEL.vocab.stoi)

and

train_data, valid_data = train_data.split(random_state = random.seed(SEED),split_ratio = 0.8)

print(f'Number of training examples: {len(train_data)}')
print(f'Number of training examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')
import torch 
import torch.nn as nn 
import torch.optim as optim
import time
import random
from torchtext.legacy import data
from torchtext.legacy import datasets
import numpy as np
from torch.autograd import Variable
import torch.optim as optim
import matplotlib.pyplot as plt
SEED =123 
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)  
torch.cuda.manual_seed_all(SEED)
# torch.backends.cudnn.deterministic=True
# torch.backends.cudnn.benchmark = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(torch.cuda.get_device_name(0))

each part is a code cell. I just load data and divide them into three part(test validation and train) once and run other code cell a few times, it still get different result. please help me

Based on your code snippet you are not setting the deterministic cudnn settings and are not using the proposed torch.use_deterministic_algorithms() operation, so non-deterministic results might be expected.

how can I fix it? thanks u very much