AssertionError: Embedding doesn't compute the gradient w.r.t. the indices , RuntimeError: element 0 of variables does not require grad and does not have a grad_fn

Trying to feed my decoder’s outputs back into itself during minibatch training

        for di in range(max_target_length):

            decoder_output, decoder_hidden, decoder_attn = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
        
            # Use decoder's own output as next input
            # choose most likely token to input to itself 
            topv, topi = decoder_output.data.topk(1)
            #print("topi", topi.size())
            ni = topi[:,0] #topi.view(-1) #ni torch.Size([128])
            #Variable(torch.stack(d, 0))
            #print("ni",  ni.size())
            decoder_input = Variable(ni, requires_grad=True)
            #decoder_input = Variable(torch.LongTensor([[ni]]))
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input

When requires_grad=True i get:

AssertionError Traceback (most recent call last)
in ()
42 input_batches, input_lengths, target_batches, target_lengths,
43 encoder, decoder,
—> 44 encoder_optimizer, decoder_optimizer, criterion, clip
45 )
46

in train(input_batches, input_lengths, target_batches, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, clip, max_length, teacher_forcing_ratio)
47
48 decoder_output, decoder_hidden, decoder_attn = decoder(
—> 49 decoder_input, decoder_hidden, encoder_outputs)
50
51 # Use decoder’s own output as next input

/home/carson/.local/lib/python3.5/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
–> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)

in forward(self, input_seq, last_hidden, encoder_outputs)
27 batch_size = input_seq.size(0)
28 #hidden = self.hidden0.repeat(1, batch_size, 1)
—> 29 embedded = self.embedding(input_seq)
30 embedded = self.embedding_dropout(embedded)
31 embedded = embedded.view(1, batch_size, self.hidden_size) # S=1 x B x N, view is like reshape()

/home/carson/.local/lib/python3.5/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
–> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)

/home/carson/.local/lib/python3.5/site-packages/torch/nn/modules/sparse.py in forward(self, input)
101 input, self.weight,
102 padding_idx, self.max_norm, self.norm_type,
–> 103 self.scale_grad_by_freq, self.sparse
104 )
105

/home/carson/.local/lib/python3.5/site-packages/torch/nn/_functions/thnn/sparse.py in forward(cls, ctx, indices, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
38
39 assert indices.dim() <= 2
—> 40 assert not ctx.needs_input_grad[0], "Embedding doesn’t "
41 "compute the gradient w.r.t. the indices"
42

AssertionError: Embedding doesn’t compute the gradient w.r.t. the indices

When requires_grad=False i get:

RuntimeError Traceback (most recent call last)
in ()
42 input_batches, input_lengths, target_batches, target_lengths,
43 encoder, decoder,
—> 44 encoder_optimizer, decoder_optimizer, criterion, clip
45 )
46

in train(input_batches, input_lengths, target_batches, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, clip, max_length, teacher_forcing_ratio)
71 )
72
—> 73 loss.backward()
74
75 # Clip gradient norms

/home/carson/.local/lib/python3.5/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
165 Variable.
166 “”"
–> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
168
169 def register_hook(self, hook):

/home/carson/.local/lib/python3.5/site-packages/torch/autograd/init.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
97
98 Variable._execution_engine.run_backward(
—> 99 variables, grad_variables, retain_graph)
100
101

RuntimeError: element 0 of variables does not require grad and does not have a grad_fn

Thanks for any help!

1 Like