Getting p.grad = none for all parameters in a bi-directional LSTM network

RGaonkar · August 10, 2018, 7:15am

Hi,

My network works fine till the labels_correct = self.linear_labels_correct(dense_out) line in the attached code. As soon as I add another linear layer in parallel with the same input “dense_out”, it starts giving loss.grad = none and p.grad = none for all parameters p in model.parameters(). I am new to pytorch and deep learning. I am not understanding what’s happening. It would be great to get some direction on this!

		x_sent = self.embeddings(input_sent)
		x_char = self.embeddings(input_char)

		h_sent, (h_t_sent, c_t_sent) = self.lstm_sent(x_sent,(h0_sent, c0_sent))

		linear_sent = self.linear_sent(h_sent)

		linear_char = self.linear_char(x_char)

		linear_concat = torch.cat((linear_sent, linear_char), 1)

		linear_sum = torch.sum(linear_concat, dim=1)
		
		dense_out = F.relu(linear_sum)

		labels_correct = self.linear_labels_correct(dense_out)

		labels_incorrect = self.linear_labels_incorrect(dense_out)

		labels_stacked = torch.stack([labels_correct, labels_incorrect], dim = -1)

		labels_softmax = torch.nn.functional.softmax(labels_stacked, dim = -1)

		labels_correct_softmax, labels_incorrect_softmax = torch.unbind(labels_softmax, dim=-1)

		labels_correct_pred = Variable(labels_correct_softmax, requires_grad=True)

		return labels_correct_pred

This is the init function for the network

	def __init__(self, obj, vocab_limit, embedding_dim, hidden_dim, num_layers, batch_size):
		super(Model, self).__init__()
		self.num_layers = num_layers
		self.hidden_dim = hidden_dim
		self.embedding_dim = embedding_dim
		self.embeddings, num_embeddings= create_emb_layer(obj.train_weights_matrix, 0, False)
		self.lstm_sent = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True)

		self.linear_sent = nn.Linear(self.hidden_dim*2, 20)

		self.linear_char = nn.Linear(self.embedding_dim, 20)

		self.linear_labels_correct = nn.Linear(20, label_size)

		self.linear_labels_incorrect = nn.Linear(20, label_size)