Gradients for User Function Not Being Calculated

Hi all,

I am trying to write a user function using Cupy to interface with Pytorch using the tutorial shown here Numpy Extensions Tutorial.

I have checked that the output of the convolution is working. However, the output of the gradient is not calculated. I assume I have overlooked something.

My code is here:

from numpy import flip
import numpy as np
import cupy as cp
import sigpy
import operator
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
from torch.utils.dlpack import to_dlpack
from torch.utils.dlpack import from_dlpack
from torch.autograd import Function


class ExampleFunction(Function):
    @staticmethod
    def forward(ctx, input, filter, bias):
        # detach so we can cast to NumPy
        ctx.save_for_backward(input, filter, bias)
        input, filter, bias = sigpy.from_pytorch(input), sigpy.from_pytorch(filter), sigpy.from_pytorch(bias)
    
        result = sigpy.convolve(input, flip(flip(filter, axis=0), axis=1), mode='valid')
        result = cp.add(result, bias)
        result=sigpy.to_pytorch(result,  requires_grad=True)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        input, filter, bias = ctx.saved_tensors
        grad_output, input, filter, bias = sigpy.from_pytorch(grad_output), sigpy.from_pytorch(input), sigpy.from_pytorch(filter), sigpy.from_pytorch(bias)
        grad_bias = cp.sum(grad_output, keepdims=True)
        grad_input = sigpy.convolve(grad_output, filter, mode='full')
        grad_filter = sigpy.convolve(input,  flip(flip(grad_output, axis=0), axis=1), mode='valid')
        
        grad_input = sigpy.to_pytorch(grad_input,  requires_grad=True)
        grad_bias = sigpy.to_pytorch(grad_bias,  requires_grad=True)
        grad_filter = sigpy.to_pytorch(grad_filter,  requires_grad=True)
        
        return grad_input, grad_filter.to(torch.float), grad_bias.to(torch.float)


class Example(Module):
    def __init__(self, filter_width, filter_height):
        super(Example, self).__init__()
        self.filter = Parameter(torch.randn(filter_width, filter_height))
        self.bias = Parameter(torch.randn(1, 1))

    def forward(self, input):
        return ExampleFunction.apply(input, self.filter, self.bias)
    
    
    
torch.cuda.is_available()
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  

module = Example(3, 3)
module = module.to('cuda:0')
print("Filter and bias: ", list(module.parameters()))
input = torch.randn(10, 10, requires_grad=True).cuda()
output = module(input)
print("Output from the convolution2: ", output)
rand = torch.randn(8, 8).cuda()
output.backward(rand)
print("Gradient for the input map: ", input.grad)

This outputs the following:

Filter and bias: [Parameter containing:
tensor([[ 0.5030, -0.7362, -0.3410],
[ 0.2329, 0.3117, -0.3578],
[ 0.2351, 0.2211, 0.8780]], device=‘cuda:0’, requires_grad=True), Parameter containing:
tensor([[0.6562]], device=‘cuda:0’, requires_grad=True)]
Output from the convolution2: tensor([[ 0.1825, 2.5443, -0.1493, 1.2438, 2.3477, 3.8454, 1.6594, -0.8285],
[-0.0698, 1.2464, 2.1476, -1.8530, 0.5620, 2.8346, 2.4183, 0.1029],
[ 1.9444, 0.4215, 1.8121, -0.5721, -1.5620, -0.5422, 0.8763, 2.5569],
[-0.0357, 2.7819, 0.8434, 2.2674, 1.8015, 0.2055, -1.6885, 1.8099],
[ 0.9745, -0.5870, 1.4823, 2.2904, 3.4809, -1.1498, 1.9393, 0.4010],
[ 1.2713, 1.5730, 0.6142, 0.4776, 0.6151, 0.0946, -0.1609, 3.4696],
[-1.2004, -0.1432, 0.6048, 0.1454, -0.0348, 1.5170, 3.0961, -1.0933],
[ 1.4833, -1.0142, -0.7154, 0.7169, -2.2208, 1.5966, 0.0809, -1.2016]],
device=‘cuda:0’, grad_fn=)
Gradient for the input map: None

Can someone point out why this is not working?

Cheers!

Could you try to create the input with the device argument, as you are currently creating a non-leaf tensor with the .cuda() operation:

input = torch.randn(10, 10, requires_grad=True, device='cuda')