I wanted to build text encoder-decoder based on nn.TransformerEncoder and nn.TransformerDecoder.
Given sentence to encoder, I want to get a vector representation of size 270 and in the decoder I want to extract the given text using the vector representation.
class TransformerEncoder(nn.Module):
def __init__(self, input_size, num_head, hidden_size, num_layers):
super(TransformerEncoder, self).__init__()
self.embd = nn.Embedding(word_count,input_size)
encoder_layer = nn.TransformerEncoderLayer(input_size, num_head, hidden_size)
self.transformer_enc = nn.TransformerEncoder(encoder_layer, num_layers)
self.linear1 = nn.Linear(input_size,270)
def forward(self, x):
x = x.long()
emb = self.embd(x)
mem = self.transformer_enc(emb)
out = self.linear1(mem)
return out, mem
class TransformerDecoder(nn.Module):
def __init__(self, input_size, num_head, output_size, hidden_size, num_layers):
super(TransformerDecoder, self).__init__()
decoder_layer = nn.TransformerDecoderLayer(input_size, num_head, hidden_size)
self.transformer_dec = nn.TransformerDecoder(decoder_layer, num_layers)
self.linear1 = nn.Linear(input_size, output_size)
def forward(self, x, mem):
out = self.transformer_dec(x, mem)
out = self.linear1(out)
return out
I am wondering if my implementation is correct because I have not used positional encoding and masking. Is it mandatory to use positional encoding and masking when using Transformer? Is my implementation correct?