I am writing my own forward and backward function for a custom layer. Forward function runs alright but when I call loss.backward(), i get following error.
Traceback (most recent call last):
File “/usr/share/java/pycharm-community/helpers/pydev/pydevd.py”, line 1599, in
globals = debugger.run(setup[‘file’], None, None, is_module)
File “/usr/share/java/pycharm-community/helpers/pydev/pydevd.py”, line 1026, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File “/usr/share/java/pycharm-community/helpers/pydev/_pydev_imps/_pydev_execfile.py”, line 18, in execfile
exec(compile(contents+"\n", file, ‘exec’), glob, loc)
File “/home/usama/PycharmProjects/test/Maxout_test.py”, line 126, in
loss.backward()
File “/home/usama/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py”, line 156, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
File “/home/usama/anaconda3/lib/python3.6/site-packages/torch/autograd/init.py”, line 98, in backward
variables, grad_variables, retain_graph)
File “/home/usama/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py”, line 91, in apply
return self._forward_cls.backward(self, *args)
File “/home/usama/PycharmProjects/test/Maxout_test.py”, line 40, in backward
input[:, i * 4] = a0 * grad_output
File “/home/usama/anaconda3/lib/python3.6/site-packages/torch/tensor.py”, line 309, in mul
return self.mul(other)
TypeError: mul received an invalid combination of arguments - got (Variable), but expected one of:
- (int value)
didn’t match because some of the arguments have invalid types: (!Variable!) - (torch.cuda.ByteTensor other)
didn’t match because some of the arguments have invalid types: (!Variable!)
loss is calculated with two variables, which works fine with built-in modules of pytorch but gives error with my custom module. In case here is the code for my custom module.
class Maxout(Function):
# Note that both forward and backward are @staticmethods
@staticmethod
# bias is an optional argument
def forward(ctx, input):
x = input
kernels = x.shape[1] # to get how many kernels/output
feature_maps = int(kernels / 4)
out_shape = (x.shape[0], feature_maps, 4, x.shape[2], x.shape[3])
# print(out_shape)
x= x.view(out_shape)
# print(x.data.shape)
y, indices = torch.max(x[:, :, :], 2)
ctx.save_for_backward(input)
ctx.indices=indices
return y
# This function has only a single output, so it gets only one gradient
@staticmethod
def backward(ctx, grad_output):
input,indices= ctx.saved_tensors,ctx.indices
for i in range(indices.shape[1]):
a0 = indices[:, i] == 0
a1 = indices[:, i] == 1
a2 = indices[:, i] == 2
a3 = indices[:, i] == 3
input[:, i * 4] = a0 * grad_output
input[:, i * 4 + 1] = a1 * grad_output
input[:, i * 4 + 2] = a2 * grad_output
input[:, i * 4 + 3] = a3 * grad_output
return input