Hello!
I am a beginner in pytorch and had some doubts regarding creating a custom Neural Network class which has two functions - init() and forward(). I am getting different results when I keep an unused layer in init() function (i.e, the layer is not used in forward() function) with respect to those obtained by removing the unused layer from init() function. Is this being caused by the model.parameters() passed to the optimizer or some other reason?
Thank you!
The models producing different outputs -
-
With unused layer - (self.lstm_attention3)
class NeuralNet4(nn.Module):
def init(self):
super(NeuralNet4, self).init()
hidden_size = 60
fc_layer = 16
self.GaussianNoise = GaussianNoise(std=0.1,mean=0)
self.dropout3 = nn.Dropout(0.1)
self.embedding_dropout3 = nn.Dropout2d(0.1)
self.embedding3 = nn.Embedding(max_features, embed_size)
self.embedding3.weight = nn.Parameter(torch.tensor(fasttext_embeddings, dtype=torch.float32))
self.embedding3.weight.requires_grad = False
self.lstm3 = nn.LSTM(embed_size, hidden_size, bidirectional=True, batch_first=True)
self.gru3 = nn.GRU(hidden_size * 2, hidden_size, bidirectional=True, batch_first=True)
self.lstm_attention3 = Attention(hidden_size * 2, maxlen)
self.relu3 = nn.ReLU()
self.linear3 = nn.Linear(240, fc_layer) #481-60, 801-100
self.out3 = nn.Linear(fc_layer, 1)def forward(self, x):
h_embedding3 = self.embedding3(x)
h_lstm3, _ = self.lstm3(h_embedding3)
h_gru3, _ = self.gru3(h_lstm3)
avg_pool3 = torch.mean(h_gru3, 1)
max_pool3, _ = torch.max(h_gru3, 1)
conc3 = torch.cat((avg_pool3, max_pool3), 1)
conc3 = self.relu3(self.linear3(conc3))
out = self.out3(conc3)
return out -
Without unused layer -
class NeuralNet4(nn.Module):
def init(self):
super(NeuralNet4, self).init()
hidden_size = 60
fc_layer = 16
self.GaussianNoise = GaussianNoise(std=0.1,mean=0)
self.dropout = nn.Dropout(0.1)
self.embedding_dropout = nn.Dropout2d(0.1)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(fasttext_embeddings, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True, batch_first=True)
self.gru = nn.GRU(hidden_size * 2, hidden_size, bidirectional=True, batch_first=True)
self.relu = nn.ReLU()
self.linear = nn.Linear(240, fc_layer) #481-60, 801-100
self.out = nn.Linear(fc_layer, 1)def forward(self, x):
h_embedding = self.embedding(x)
h_lstm, _ = self.lstm(h_embedding)
h_gru, _ = self.gru(h_lstm)
avg_pool = torch.mean(h_gru, 1)
max_pool, _ = torch.max(h_gru, 1)
conc = torch.cat((avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
out = self.out(conc)
return out