I am trying to create a custom layer with a custom gradient.
This layer should effectively act like two normal nn.Linear() layers in parallel, where one of them is slightly modified.
Since I don’t want to reinvent the wheel and define the weights and biases for the contained linear layers myself, I’m wondering how I can use these two existing nn.Linear() layers directly in my custom backward() function.
This is what I have tried, but it doesn’t even give an error message, it just crashes python:
class CustomFunction(autograd.Function):
@staticmethod
def forward(ctx, input, main_layer, aux_layer):
# Both feed-forward portions are just the result of applying the respective layer to the input
output_main = main_layer(input)
output_aux = aux_layer(input)
ctx.save_for_backward(input, output_main, output_aux)
return output_main, output_aux
@staticmethod
def backward(ctx, grad_main, grad_aux):
input, output_main, output_aux = ctx.saved_tensors
grad_input = None
if ctx.needs_input_grad[0]:
# Channel the gradient of the main output into the input
# TODO: THIS IS THE PART THAT DOESN'T WORK
# (it's supposed to just call the backward() function of the main_layer)
grad_input = output_main.backward(grad_main)
# (custom code will go here later to use grad_aux, once the bug above is resolved)
return grad_input, None, None
class CustomModule(nn.Module):
def __init__(self, num_inputs, num_outputs):
super().__init__()
self.main_layer = nn.Linear(num_inputs, num_outputs)
self.aux_layer = nn.Linear(num_inputs, num_outputs)
def forward(self, input):
output_main, output_aux = SelfAssessmentFunction.apply(input, self.main_layer, self.aux_layer)
return output_main, output_aux