Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method

HamidGadirov · November 7, 2022, 2:57pm

Hi, I am getting error messages after trying to update the neural network (for optical flow) for the current PyTorch version (1.13.0+cu117, previous implementation was for 1.1). The code is the following:

class CorrelationFunction(Function):
    def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
        super(CorrelationFunction, self).__init__()
        print("in CorrelationFunction __init__")
        self.pad_size = pad_size
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride1 = stride1
        self.stride2 = stride2
        self.corr_multiply = corr_multiply
        # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1)

    # @staticmethod
    def forward(self, input1, input2): # self,
        print("in CorrelationFunction __init__")
        self.save_for_backward(input1, input2)

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()
            output = input1.new()

            correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 
                self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)

        print("returning CorrelationFunction forward")
        return output

    # @staticmethod
    def backward(self, grad_output): # self, 
        input1, input2 = self.saved_tensors

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()

            grad_input1 = input1.new()
            grad_input2 = input2.new()

            correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
                self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)

        return grad_input1, grad_input2


class Correlation(Module):
    def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
        super(Correlation, self).__init__()
        print("in Correlation __init__")
        self.pad_size = pad_size
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride1 = stride1
        self.stride2 = stride2
        self.corr_multiply = corr_multiply


    # @staticmethod
    def forward(self, input1, input2):
        print("in Correlation forward")

        result = CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2)

        return result

and I am calling Correlation like this:
out_corr_1 = Correlation(pad_size=self.search_range, kernel_size=1, max_displacement=self.search_range, stride1=1, stride2=1, corr_multiply=1)(feature_1, feature_2_warp)

However, there is an error:
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)

I have seen a related discussion (here), where it was suggested to “remove the comments before @staticmethod (# )” and then use “Class_name.apply(x)” by @ptrblck, but then I have another issue:
TypeError: apply() takes 2 positional arguments but 3 were given

Thanks in advance!

AlphaBetaGamma96 · November 7, 2022, 5:20pm

Hi @HamidGadirov,

Have you read the example in Automatic differentiation package - torch.autograd — PyTorch 2.1 documentation ?

The signature for torch.autograd.Function is now defined as,

class CustomFunc(torch.autograd.Function)

  #no need for the constuctor (__init__) bit 

  @staticmethod
  def forward(ctx, *args, **kwargs):
     #forward pass here

  @staticmethod
  def backward(ctx, *args, **kwargs):
    #backward pass here

You need to remove the __init__ method from the class (like I showed above), and then when you want to use the Function it needs to be defined as,

func = CustomFunc.apply(*args, **kwargs)

Now, func is a callable instance of torch.autograd.Function.

From this error message, check you’re not passing the self arg to the .apply() call. Also, I think that should be defined as ctx instead of self, but it could be arbitrary. See if it makes a difference.

HamidGadirov · November 7, 2022, 8:17pm

Thanks @AlphaBetaGamma96 for reply, I reimplemented the code for PyTorch 1.13:


class CorrelationFunction(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input1, input2, 
            pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
        print("in CorrelationFunction forward")
        ctx.save_for_backward(input1, input2)

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()
            output = input1.new()

            correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 
                pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)

        return output

    @staticmethod
    def backward(ctx, grad_output):
        input1, input2 = ctx.saved_tensors

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()

            grad_input1 = input1.new()
            grad_input2 = input2.new()

            correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
                pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)

        return grad_input1, grad_input2

class Correlation(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input1, input2, 
            pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): 
        print("in Correlation forward")

        result = CorrelationFunction(input1, input2, pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)
        # result = CorrelationFunction(pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)(input1, input2)

        return result

However, now when I try to use the output of Correlation, e.g.:

out_corr_relu_1 = self.leakyRELU(out_corr_1)

there is an error:

TypeError: leaky_relu_(): argument 'input' (position 1) must be Tensor, not CorrelationFunction

AlphaBetaGamma96 · November 7, 2022, 8:22pm

Hi @HamidGadirov,

You need to check wherever you’ve defined a self.leakyRELU(out_corr_1). Because it seems that out_corr_1 is your custom autograd Function and not a Tensor. I assume this is within your correlation_cuda function?

HamidGadirov · November 7, 2022, 9:12pm

Even if I comment out correlation_cuda function, it is the same issue. The problem is that return type of out_corr_1 is CorrelationFunction, and I don’t know why. Converting to tensor doesn’t work either.

AlphaBetaGamma96 · November 7, 2022, 9:14pm

Can you share a minimal reproducible example? It seems your using the custom function somewhere else. Did you remember to use the .apply method?

You can do something like this,

def func(*args, **kwargs):
  return CustomFunc.apply(*args, **kwargs)

func(*args, **kwargs) #call function like a normal python function.

HamidGadirov · November 7, 2022, 9:31pm

I was using .apply() of course. here is the minimal reproducible example (combination of pytorch example and my code):

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square, you can specify with a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class CorrelationFunction(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input1, input2, 
            pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
        print("in CorrelationFunction forward")
        ctx.save_for_backward(input1, input2)

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()
            output = input1.new()

            # correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 
            #     pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)

        return output

    @staticmethod
    def backward(ctx, grad_output):
        input1, input2 = ctx.saved_tensors

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()

            grad_input1 = input1.new()
            grad_input2 = input2.new()

            # correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
            #     pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)

        return grad_input1, grad_input2

class Correlation(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input1, input2, 
            pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): 
            # self, self for the object; when you call, it is without - synthactic sugar!
        print("in Correlation forward")

        result = CorrelationFunction(input1, input2, pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)
        # result = CorrelationFunction(pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)(input1, input2)

        return result

net = Net()
input = torch.randn(1, 1, 32, 32)

# test
leakyRELU = nn.LeakyReLU(0.1, inplace=True)
out_corr_1 = Correlation.apply(input, input, 4, 1, 4, 1, 1, 1)
out_corr_relu_1 = leakyRELU(out_corr_1)

it throws the same must be Tensor, not CorrelationFunction error

AlphaBetaGamma96 · November 7, 2022, 10:29pm

You need to use .apply here too as CorrelationFunction is also a torch.autograd.Function object. That’s why you’re getting your error about it not being a Tensor.

HamidGadirov · November 7, 2022, 11:44pm

Thanks, he issue is solved now.