import torch as th
from torch.autograd import Variable
v1 = Variable(th.randn(10), requires_grad=True)
m = v1 > 0.5
w = v1[m]
w.mean().backward()
This raises a NotImplementedError:
Traceback (most recent call last):
File "/home/nrahaman/.PyCharm2016.1/config/scratches/scratch_100.py", line 10, in <module>
w.mean().backward()
File "/home/nrahaman/miniconda2/envs/thtf/lib/python3.6/site-packages/torch/autograd/variable.py", line 146, in backward
self._execution_engine.run_backward((self,), (gradient,), retain_variables)
File "/home/nrahaman/miniconda2/envs/thtf/lib/python3.6/site-packages/torch/autograd/function.py", line 137, in backward
raise NotImplementedError
NotImplementedError
This can be worked around with gather and arange, but I’m still curious if it’s a me-problem.
So apparently, the gradient is still computed, even when the backward function throws the NotImplementedError.
import torch as th
from torch.autograd import Variable
v = Variable(th.randn(10), requires_grad=True)
m = v > 0.5
w = v[m]
try:
w.mean().backward()
except NotImplementedError:
# raise
pass
print(m)
print(v.grad.data)
class Multipler(torch.autograd.Function):
"""
We can implement our own custom autograd Functions by subclassing
torch.autograd.Function and implementing the forward and backward passes
which operate on Tensors.
"""
def forward(self, input):
"""
In the forward pass we receive a Tensor containing the input and return a
Tensor containing the output. You can cache arbitrary Tensors for use in the
backward pass using the save_for_backward method.
"""
self.save_for_backward(input)
output = input * 10000
return output
def backward(self, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
"""
input, = self.saved_tensors
grad_input = grad_output.clone()
return grad_input
class normalize(torch.nn.Module):
def __init__(self):
super(normalize, self).__init__()
self.Softmax_layer1 = nn.Softmax2d()
self.Softmax_layer2 = nn.Softmax2d()
def forward(self, inputs):
output = self.Softmax_layer1(Multipler.apply(inputs))
output = self.Softmax_layer2(Multipler.apply(output))
return output
But when I delete the *10000 in the forward function in the class Multipler, there is no such error. But what I expect is the mismatch between the forward and backward. That is my design. For my case how to solve this problem?