Legacy autograd function with non-static forward method is deprecated and will be removed in 1.3

vidkr · June 29, 2020, 4:40pm

Thank you. Your updated code, worked and solved the static method runtime error

HXLYTT · August 11, 2020, 4:10am

hey guys ,l met the same problem.
File “D:\python3.8.1Base Interpreter\lib\site-packages\torch\nn\modules\module.py”, line 550, in call
** result = self.forward(input, kwargs)
** File “D:\workplace\srtp深度学习\faster-rcnn-pytorch-master\nets\classifier.py”, line 159, in forward*
** return self.RoI(x, rois)**
** File “D:\python3.8.1Base Interpreter\lib\site-packages\torch\autograd\function.py”, line 144, in call**
** raise RuntimeError(**
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)

and when l try to add “@staticmethod” before the forward function ,it shows
Traceback (most recent call last):
** File “D:/workplace/srtp深度学习/faster-rcnn-pytorch-master/predict.py”, line 17, in **
** r_image = frcnn.detect_image(image)**
** File “D:\workplace\srtp深度学习\faster-rcnn-pytorch-master\frcnn.py”, line 97, in detect_image**
** roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)**
** File “D:\python3.8.1Base Interpreter\lib\site-packages\torch\nn\modules\module.py”, line 550, in call**
** result = self.forward(*input, kwargs)
TypeError: forward() missing 1 required positional argument: 'x’

and l don’t know where to use “.apply”

karan_purohit · August 27, 2020, 8:29am

getting similar issue: Anyone can help?

  File "finetune.py", line 61, in <module>
    output = paseplus(waveform)
  File "/data/env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "finetune.py", line 30, in forward
    x = self.pase(X.cuda(),device="cuda")     # x size will be (1, 256, 625), which are 625 frames of 256 dims each
  File "/data/env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/data/aditya/pase/pase/models/frontend.py", line 258, in forward
    h, _ = self.rnn(h)
  File "/data/env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/data/env/lib/python3.6/site-packages/torchqrnn/qrnn.py", line 164, in forward
    input, hn = layer(input, None if hidden is None else hidden[i])
  File "/data/env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/data/env/lib/python3.6/site-packages/torchqrnn/qrnn.py", line 99, in forward
    C = ForgetMult()(F, Z, hidden, use_cuda=self.use_cuda)
  File "/data/env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/data/env/lib/python3.6/site-packages/torchqrnn/forget_mult.py", line 178, in forward
    if hidden_init is None: return GPUForgetMult()(f, x) if use_cuda else CPUForgetMult()(f, x)
  File "/data/env/lib/python3.6/site-packages/torch/autograd/function.py", line 149, in __call__
    "Legacy autograd function with non-static forward method is deprecated. "
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)

ptrblck · August 27, 2020, 8:39am

The error message points to the usage of a deprecated autograd function and provides a workaround:

RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)

karan_purohit · August 27, 2020, 8:44am

yes. I have added static method to remove it but thats not working

ptrblck · August 27, 2020, 8:44am

Could you post an executable code snippet to reproduce this issue, please?

karan_purohit · August 27, 2020, 8:55am

import torch.optim as optim
import os
from pase.models.frontend import wf_builder
import numpy as np
import pickle
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
import torchaudio
class Pase(nn.Module):

    def __init__(self):
        super().__init__()
        self.pase = wf_builder('cfg/frontend/PASE+.cfg').eval().cuda()
        self.pase.load_pretrained('FE_e199.ckpt', load_last=True, verbose=True)

        self.fc1 = nn.Linear(256*16 , 512)   
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 14)

        #self.batchNorm = m = nn.BatchNorm1d(256, affine=False)  #since it has 256 dimensions
        self.dropout1 = nn.Dropout2d(p = 0.5)
        self.dropout2 = nn.Dropout2d(p = 0.3)


    def forward(self, X):
        # Now we can forward waveforms as Torch tensors
        # x = torch.randn(1, 1, 100000)  # example with random noise to check shape
        x = self.pase(X.cuda(),device="cuda")     # x size will be (1, 256, 625), which are 625 frames of 256 dims each
        
        #Padding with 0's Max size = 512
        output = torch.zeros(1, 256, 512)
        num_frames = x.shape[-1]
        output[:, :, :num_frames] = x
        pooled = F.avg_pool2d(output, kernel_size=(1,32))

        print(pooled.shape)

        #passing to linear model
        x = self.dropout1(F.relu(self.fc1(pooled)))
        x = self.dropout2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        
        return x

paseplus = Pase().cuda()
print('-----Model------')
print(paseplus) 
print()

#Testing on a random sound file
waveform, sample_rate = torchaudio.load('/data/aditya/pase/data_tts_8k/as/ASF001-AS-ST01U3.wav')
print('Shape of waveform', np.shape(waveform))
print("Sample rate of waveform: {}".format(sample_rate))

waveform = torch.unsqueeze(waveform, 0)

output = paseplus(waveform)
print(output)   #Input after passing through the model
print(output.shape)

here is the snippet. I am using paseplus embeddings and passing them forward

karan_purohit · August 28, 2020, 7:55am

@ptrblck have you found anything?

ptrblck · August 28, 2020, 8:28am

The original error seems to be raised by GPUForgetMult()(f, x) or CPUForgetMult()(f, x), which seem to be legacy Autograd functions.
In your current code snippet you are not using custom autograd.Functions, but an nn.Module.
If the error is still raised in the code snippet, I guess that wf_builder might create the autograd.Function and I would need to see the code to further debug.

Aditya_Shah · September 2, 2020, 2:48pm

@ptrblck, For every iteration, it generates this ''Legacy autograd" warning. So is there any script that ignores the warnings and does not print them while running the code?

ptrblck · September 2, 2020, 5:39pm

You could try to disable all Python warnings globally.
I would rather recommend to fix the issue and use the new autograd.Functions, as the warning would become an error, if that’s not already the case in the latest release.

Saurabh_Kataria · September 23, 2020, 10:55pm

Did you find solution? PASE+ gives me same error

Saurabh_Kataria · September 23, 2020, 11:09pm

ok i got it. Just modified GPUForgetMult in site-packages/torchqrnn/forget_mult.py

class GPUForgetMult(torch.autograd.Function):
    def __init__(self):
        super(GPUForgetMult, self).__init__()
    @staticmethod
    def forward(self, f, x, hidden_init=None):
        self.configured_gpus = {}
        self.ptx = None
        if self.ptx is None:
            program = Program(kernel, 'recurrent_forget_mult.cu')
            self.ptx = program.compile()

        if torch.cuda.current_device() not in self.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            self.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = self.configured_gpus[torch.cuda.current_device()]
#        self.compile()
        seq_size, batch_size, hidden_size = f.size()
        result = f.new(seq_size + 1, batch_size, hidden_size)
        # We only zero the result array (result[0]) if we don't set a hidden initial state
        # All other values (result[1:]) are overwritten by default
        if hidden_init is not None: result[0, :, :] = hidden_init
        else: result = result.zero_()
        ###
        grid_hidden_size = min(hidden_size, 512)
        grid = (math.ceil(hidden_size / grid_hidden_size), batch_size)
        self.forget_mult(grid=grid, block=(grid_hidden_size, 1), args=[result.data_ptr(), f.data_ptr(), x.data_ptr(), seq_size, batch_size, hidden_size], stream=self.stream)
        self.save_for_backward(f, x, hidden_init)
        self.result = result
        return result[1:, :, :]
    @staticmethod
    def backward(self, grad_h):
        self.configured_gpus = {}
        self.ptx = None
        if self.ptx is None:
            program = Program(kernel, 'recurrent_forget_mult.cu')
            self.ptx = program.compile()

        if torch.cuda.current_device() not in self.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            self.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = self.configured_gpus[torch.cuda.current_device()]
#        self.compile()
        f, x, hidden_init = self.saved_tensors
        h = self.result
        ###
        seq_size, batch_size, hidden_size = f.size()
        # Zeroing is not necessary as these will be overwritten
        grad_f = f.new(*f.size())
        grad_x = f.new(*f.size())
        grad_h_init = f.new(batch_size, hidden_size)
        ###
        grid_hidden_size = min(hidden_size, 512)
        grid = (math.ceil(hidden_size / grid_hidden_size), batch_size)
        self.bwd_forget_mult(grid=grid, block=(grid_hidden_size, 1), args=[h.data_ptr(), f.data_ptr(), x.data_ptr(), grad_h.data_ptr(), grad_f.data_ptr(), grad_x.data_ptr(), grad_h_init.data_ptr(), seq_size, batch_size, hidden_size], stream=self.stream)
        ###
        if hidden_init is not None:
            return grad_f, grad_x, grad_h_init
        return grad_f, grad_x

could have been done in more clean way although

Megh_Bhalerao · October 31, 2020, 12:53pm

Hi all,
Since this issue is already open, I would like to ask my doubt here. I am getting the same error that is mentioned in the heading of the issue. My network is this (it is basically a gradient reversal layer):

class GradReverse(Function):
    def __init__(self, lambd):
        self.lambd = lambd
    #@staticmethod
    def forward(self, x):
        return x.view_as(x)
    #@staticmethod
    def backward(self, grad_output):
        return (grad_output * -self.lambd)

I got the error and followed the example as shown in the link, and I tried writing in the modified format as given below:

class GradReverse(Function):
    def __init__(self, lambd):
        self.lambd = lambd
    @staticmethod
    def forward(ctx, x):
        result = x.view_as(x)
        ctx.save_for_backward(result)
        return result
    @staticmethod
    def backward(ctx, grad_output):
        return (grad_output * -self.lambd)

Them I get the error that self is not defined, can someone please help me in structuring my old layer according to the new method. I have been stuck with this for quite a while.
Any help would be greatly appreciated.
Thanks,
Megh

meifish · November 18, 2020, 3:27pm

The example in this post might help:

Seems in the new style you would remove the init() entirely, and move whatever argument in your init() to forward() function.

Something like this, but I am not entirely sure. I am also looking for the solution to modify an old Pytorch code:

@staticmethod
def forward(ctx, x, lambd):
     ctx.lambd = lambd
     result = x.view_as(x)
     return result

@staticmethod
def backward(ctx, grad_output):
     return (grad_output * -ctx.lambd), None

Megh_Bhalerao · November 18, 2020, 4:00pm

Thanks. I got my answer here: Custom autograd.Function: must it be static?

Megh_Bhalerao · November 23, 2020, 8:04am

This is also correct. Thank you.

Marcilio_Moreira · March 12, 2021, 2:04pm

I’m trying to run the code below and getting errors:

The code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

import cv2
from torch.autograd import Variable
import torch
from data import BaseTransform, VOC_CLASSES as labelmap
from ssd import build_ssd
import imageio

def detect(frame, net, transform):
height, width = frame.shape[:2]
frame_t = transform(frame)[0]
x = torch.from_numpy(frame_t).permute(2, 0, 1)
x = Variable(x.unsqueeze(0))
y = net(x)
detections = y.data
scale = torch.Tensor([width, height, width, height])
#detections = [batch, number of classes, number of occurence, (score, x0,y0,x1,y1)]
for i in range(detections.size(1)):
j = 0
while detections[0, i, j,0] >=0.6:
pt = (detections[0, i, j, 1:] * scale).numpy()
cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255,0,0),2)
cv2.putText(frame, labelmap[i-1], (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,255,255),2,cv2.LINE_AA)
j+=1
return frame

net = build_ssd(‘test’)
net.load_state_dict(torch.load(‘ssd300_mAP_77.43_v2.pth’, map_location= lambda storage, loc: storage))

transform = BaseTransform(net.size, (104/256.0, 117/256.0, 123/256.0))

reader = imageio.get_reader(‘video_teste01ssd.mp4’)
fps = reader.get_meta_data()[‘fps’]
writer = imageio.get_writer(‘output.mp4’,fps = fps)
for i, frame in enumerate(reader):
frame = detect(frame, net.eval(), transform)
writer.append_data(frame)
print(i)
writer.close()

The error is

RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: Automatic differentiation package - torch.autograd — PyTorch 1.8.0 documentation)

Could someone help me, please?

ptrblck · March 13, 2021, 3:29am

I don’t see any custom autograd.Function implementation in the posted code snippet, so I assume the model or any other imported method uses it.
If you follow the link in the error message, you’ll get some information how this function should be implemented and would have to fix it.

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier.

Roqyah_bdeen · June 24, 2021, 7:38am

Hi every one,
my problem is similar but no one of these solutions resolved it
this is my code

import torch.nn as nn
from torch.nn.modules.module import Module
from torch.autograd import Function
import correlation_cuda

class CorrelationFunction(Function):

    def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
        super(CorrelationFunction, self).__init__()
        self.pad_size = pad_size
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride1 = stride1
        self.stride2 = stride2
        self.corr_multiply = corr_multiply
        # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1)
        
    
    def forward(self, input1, input2):
        self.save_for_backward(input1, input2)

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()
            output = input1.new()

            correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 
                self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)

        return output
        
    
    def backward(self, grad_output):
        input1, input2 = self.saved_tensors

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()

            grad_input1 = input1.new()
            grad_input2 = input2.new()

            correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
                self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)

        return grad_input1, grad_input2


class Correlation(nn.Module):
    def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
        super(Correlation, self).__init__()
        self.pad_size = pad_size
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride1 = stride1
        self.stride2 = stride2
        self.corr_multiply = corr_multiply
        
    
    def forward(self, input1, input2):

        result=CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2)
        #result=out.apply(input1, input2)

        return result

in this is how I call it

corr = Correlation(pad_size=self.search_range, kernel_size=1,
                                max_displacement=self.search_range, stride1=1,
                                stride2=1, corr_multiply=1)
out_corr = corr(x1, x2_warp)

first I got [ Legacy autograd function with non-static forward method is deprecated] Then I’ve added the decoration @staticmethod but I have gotten this error

out_corr = self.corr(x1, x2_warp)
  File "/home/ubuntu/anaconda3/envs/new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
TypeError: forward() missing 1 required positional argument: 'input2'

Then I have added another .apply

out_corr = self.corr.apply(x1, x2_warp)
but I got this error 
out_corr = corr.apply(x1, x2_warp)
TypeError: apply() takes 2 positional arguments but 3 were given