Why my code is still not deterministic?

barakb · June 10, 2020, 6:30am

Hi, I dealt with those issues before, and all my CNNs gave me determenistic results, now that I’m using RNNs I’m getting non-determenistic results again.
First all the models are running on the same gpu.
This is my code (in short):

if __name__ == "__main__":
    args = parser.parse_args()
    #####
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    random.seed(args.seed)
    #####

    reviews = pd.read_csv("Reviews.csv")
    .......
    reviews['review_length'] = reviews['review'].apply(lambda x: len(x.split()))
    np.mean(reviews['review_length'])
    tok = spacy.load('en_core_web_sm')


   ....some data manipulation....
    X = list(reviews['encoded'])
    y = list(reviews['rating'])
   
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=args.seed)#X[:haluka],X[haluka:],y[:haluka],y[haluka:]#
    train_ds = ReviewsDataset(X_train, y_train)
    valid_ds = ReviewsDataset(X_valid, y_valid)

    train_dl = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
    val_dl = DataLoader(valid_ds, batch_size=args.batch_size)
    model_fixed = LSTM_fixed_len(vocab_size, 50, 50)

    train_model(model_fixed.cuda(), epochs=args.epochs, lr=0.01)

One of my thoughts was that “train_test_split” may cause it, but even when straight forward splitting I got non-deterministic results.

I’ll add the functions:

class ReviewsDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.y = Y
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx][0].astype(np.int32)), self.y[idx], self.X[idx][1]

def tokenize(text):
    text = re.sub(r"[^\x00-\x7F]+", " ", text)
    regex = re.compile('[' + re.escape(string.punctuation) + '0-9\\r\\t\\n]')  # remove punctuation and numbers
    nopunct = regex.sub(" ", text.lower())
    return [token.text for token in tok.tokenizer(nopunct)]
def encode_sentence(text, vocab2index, N=70):
    tokenized = tokenize(text)
    encoded = np.zeros(N, dtype=int)
    enc1 = np.array([vocab2index.get(word, vocab2index["UNK"]) for word in tokenized])
    length = min(N, len(enc1))
    encoded[:length] = enc1[:length]
    return encoded, length

def train_model(model, epochs, lr=0.001):
    global best_acc, best_histo, best_epoch
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=lr)
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for batch_idx,(x, y, l) in enumerate(train_dl):
            x = x.long().cuda()
            y = y.long().cuda()
            l = l.cuda()
            y_pred = model(x,l)
            optimizer.zero_grad()
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item() * y.shape[0]
            total += y.shape[0]
        val_loss, val_acc, histogram  = validation_metrics(model, val_dl)
        if val_acc > best_acc:
           best_acc = val_acc
           best_histo = histogram 
           best_epoch = i



def validation_metrics(model, valid_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    sum_rmse = 0.0
    for x, y, l in valid_dl:
        x = x.long().cuda()
        y = y.long().cuda()
        l = l.cuda()
        y_hat = model(x, l)
        loss = F.cross_entropy(y_hat, y)
        pred = torch.max(y_hat, 1)[1]
        correct += (pred == y).float().sum()
        total += y.shape[0]
        sum_loss += loss.item() * y.shape[0]
        
    return sum_loss / total, correct / total


class LSTM_fixed_len(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 5)

    def forward(self, x, l):
        x = self.embeddings(x)
        lstm_out, (ht, ct) = self.lstm(x)
        return self.linear(ht[-1])

I tried to make the code short as possible, but I wanted to show most of it since I don’t know what can make things non-deterministic, I’m NLP newbie so maybe something in the tokenizing process, something in the lstms/embedding?

I’ll be happy for your help, if you got any clue.
two comments:

Of course that the different results come from the same seed.
Just wanted to clarify that the code is not mine, and I’m claiming it to be.

ptrblck · June 10, 2020, 8:51am

Could you just use your nn.LSTM module and check if the outputs are deterministic or not?
If not, which CUDA and cudnn version are you using?

barakb · June 10, 2020, 12:45pm

Ok, I did:

class LSTM_fixed_len(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(3,3, batch_first=True)

    def forward(self, x, l):
        
        lstm_out, (ht, ct) = self.lstm(x)
        return ht[-1]

#####
torch.manual_seed(60)
torch.cuda.manual_seed(60)
np.random.seed(60)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(60)
#####
vec = torch.normal(torch.zeros(2,2,3),torch.ones(2,2,3))#.view(1,3)
vec1 = torch.normal(torch.zeros(2,2,3),torch.ones(2,2,3))#.view(1,3)

model = LSTM_fixed_len()
out = model(vec,vec1)
print(out)

And got determenistic results

What is our next step?