In the pytorch Seq2Seq tutorial provided at pytorch website, in the decoder there is a extra relu step between Embedding and GRU but in the encoder there is no relu. Why is that? How this make any difference ? Can anyone Elaborate?
class DecoderRNN(nn.Module):
def init(self, hidden_size, output_size):
super(DecoderRNN, self).init()
self.hidden_size = hidden_sizeself.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1) def forward(self, input, hidden): output = self.embedding(input).view(1, 1, -1) *output = F.relu(output)* output, hidden = self.gru(output, hidden) output = self.softmax(self.out(output[0])) return output, hidden def initHidden(self): return torch.zeros(1, 1, self.hidden_size, device=device)
class EncoderRNN(nn.Module):
def init(self, input_size, hidden_size):
super(EncoderRNN, self).init()
self.hidden_size = hidden_sizeself.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) def forward(self, input, hidden): embedded = self.embedding(input).view(1, 1, -1) output = embedded output, hidden = self.gru(output, hidden) return output, hidden def initHidden(self): return torch.zeros(1, 1, self.hidden_size, device=device)