Hello.
I am trying to make Trasformer model. Now I want to get output tensor and want to convert it into sentence. But I have no idea to get output.
I tried to get output referring to this repository.
This is the code of Transformer model.
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer, TransformerDecoder, TransformerDecoderLayer
class TransformerModel(nn.Module):
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
super(TransformerModel, self).__init__()
self.model_type = 'Transformer'
self.pos_encoder = PositionalEncoding(ninp, dropout)
encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
self.encoder = nn.Embedding(ntoken, ninp)
self.decoder = nn.Embedding(ntoken, ninp)
self.ninp = ninp
decoder_layers = TransformerDecoderLayer(ninp, nhead, nhid, dropout)
self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers)
self.init_weights()
def generate_square_subsequent_mask(self, sz):
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
return mask
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
# self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, src, trg):
trg_mask = model.generate_square_subsequent_mask(trg.size()[0]).to(device)
src = self.encoder(src)
trg = self.decoder(trg)
src = self.pos_encoder(src)
trg = self.pos_encoder(trg)
output = self.transformer_encoder(src)
output = self.transformer_decoder(trg, output,tgt_mask = trg_mask)
return output
This is the code that I am trying to generate sentence.
def gen_sentence(sentence, src_field, trg_field, model, max_len = 50):
model.eval()
tokens = [src_field.init_token] + tokenizer(sentence) + [src_field.eos_token]
src_index = [src_field.vocab.stoi[i] for i in tokens]
src_tensor = torch.LongTensor(src_index).unsqueeze(0).to(device)
# src_len = torch.LongTensor([len(src_index)]).to(device)
src_tensor = model.encoder(src_tensor)
src_tensor = mode.pos_encoder(src_tensor)
with torch.no_grad():
enc_output = model.transformer_encoder(src_tensor)
trg_index = [trg_field.vocab.stoi[trg_field.init_token]]
for i in range(max_len):
trg_tensor = torch.LongTensor(trg_index[-1]).unsqueeze(2).to(device)
trg_tensor = model.encoder(trg_tensor)
trg_tensor = model.pos_encoder(trg_tensor)
with torch.no_grad():
output = model.transformer_deocder(trg_tensor, enc_output)
pred_token = output.argmax(1).item()
trg_index.append(pred_token)
if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
break
trg_tokens = [trg_field.vocab.itos[i] for i in trg_index]
return trg_tokens
def gen_sentence_list(path):
col, pred = [], []
input, output = [], []
with open(path, mode = 'r') as f:
for file_list in f:
col.append(file_list.split('\t'))
for i in col:
input.append(i[0])
output.append(i[1])
for sentence in input:
pred.append(gen_sentence(sentence, SRC, SRC, model))
return input, output, pred