I tried to create a Transformer-based Seq2Seq generation using greedy search. Once I ran the code below, the outputs are exactly the same. That’s good; nothing is wrong with this. However, if I run it again, the result is completely different.
Anyone knows what might cause this?
for _ in range(10): # Do not forget this "loop" line.
import math
import os
from collections import namedtuple
import random
import numpy as np
import torch
import torch.nn as nn
from tqdm.auto import tqdm
from transformers import BertTokenizer
from torch.utils.data import Dataset, DataLoader
from src.models.transformer import Transformer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', additional_special_tokens=['_eos', '_go'])
args = {
'max_src_len': 300,
'max_tgt_len': 50,
'batch_size': 64,
'vocab_size': len(tokenizer),
'hidden_size': 512,
'dropout': 0.2,
'num_layers': 4,
'num_heads': 8,
'sos_idx': tokenizer.encode('_go', add_special_tokens=False)[0],
'eos_idx': tokenizer.encode('_eos', add_special_tokens=False)[0],
'pad_idx': tokenizer.encode('[PAD]', add_special_tokens=False)[0],
}
args = namedtuple('args', args.keys())(*args.values())
# Helper function
def convert_att_into_mask(mask):
return mask.bool().masked_fill(mask == 0, True).masked_fill(mask == 1, False)
# Transformer Generatior - Pretrained
gen_weight_path = '/data/vitou/100DaysofCode/conv_agent/checkpoints/pretrained/transformer_generative.pt'
generator = Transformer(args).to(device)
generator.load_state_dict(torch.load(gen_weight_path))
generator.eval()
tokenizer_config = {
'add_special_tokens' : False,
'return_token_type_ids' : False,
'return_tensors' : 'pt',
'padding': True
}
with torch.no_grad():
ctx_text = "hello. do you play any video games? _eos"
src = tokenizer(ctx_text, **tokenizer_config)
src_input_ids, src_att_mask = [ x.to(device) for x in src.values() ]
src_input_ids = torch.transpose(src_input_ids, 0, 1).contiguous()
src_att_mask = convert_att_into_mask(src_att_mask)
res = generator.generate(src_input_ids, src_att_mask[:1])
print ("[context]: ", ctx_text)
print ("[greedy]: ", tokenizer.decode(res))
print ("-------------------------------------------")
Output #1
[context]: hello. do you play any video games? _eos
[greedy]: of the games. i don't really watch much tv. i don't watch much tv. you? _go people like to watch tv. _go of the time. _go people do. _go of the sports. _go of the actors
-------------------------------------------
[context]: hello. do you play any video games? _eos
[greedy]: of the games. i don't really watch much tv. i don't watch much tv. you? _go people like to watch tv. _go of the time. _go people do. _go of the sports. _go of the actors
-------------------------------------------
[x8] more of this (exactly the same for each iteration)
Output #2
[context]: hello. do you play any video games? _eos
[greedy]: i do, i love the simpsons! _eos
-------------------------------------------
[context]: hello. do you play any video games? _eos
[greedy]: i do, i love the simpsons! _eos
-------------------------------------------
[x8] more of this (exactly the same for each iteration)
So the output I got is either one of the two outputs above.