Hi,
My network works fine till the labels_correct = self.linear_labels_correct(dense_out) line in the attached code. As soon as I add another linear layer in parallel with the same input “dense_out”, it starts giving loss.grad = none and p.grad = none for all parameters p in model.parameters(). I am new to pytorch and deep learning. I am not understanding what’s happening. It would be great to get some direction on this!
x_sent = self.embeddings(input_sent)
x_char = self.embeddings(input_char)
h_sent, (h_t_sent, c_t_sent) = self.lstm_sent(x_sent,(h0_sent, c0_sent))
linear_sent = self.linear_sent(h_sent)
linear_char = self.linear_char(x_char)
linear_concat = torch.cat((linear_sent, linear_char), 1)
linear_sum = torch.sum(linear_concat, dim=1)
dense_out = F.relu(linear_sum)
labels_correct = self.linear_labels_correct(dense_out)
labels_incorrect = self.linear_labels_incorrect(dense_out)
labels_stacked = torch.stack([labels_correct, labels_incorrect], dim = -1)
labels_softmax = torch.nn.functional.softmax(labels_stacked, dim = -1)
labels_correct_softmax, labels_incorrect_softmax = torch.unbind(labels_softmax, dim=-1)
labels_correct_pred = Variable(labels_correct_softmax, requires_grad=True)
return labels_correct_pred
This is the init function for the network
def __init__(self, obj, vocab_limit, embedding_dim, hidden_dim, num_layers, batch_size):
super(Model, self).__init__()
self.num_layers = num_layers
self.hidden_dim = hidden_dim
self.embedding_dim = embedding_dim
self.embeddings, num_embeddings= create_emb_layer(obj.train_weights_matrix, 0, False)
self.lstm_sent = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True)
self.linear_sent = nn.Linear(self.hidden_dim*2, 20)
self.linear_char = nn.Linear(self.embedding_dim, 20)
self.linear_labels_correct = nn.Linear(20, label_size)
self.linear_labels_incorrect = nn.Linear(20, label_size)