Legacy autograd function with non-static forward method is deprecated and will be removed in 1.3

Could you post an executable code snippet to reproduce this issue, please?

1 Like
import torch.optim as optim
import os
from pase.models.frontend import wf_builder
import numpy as np
import pickle
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
import torchaudio
class Pase(nn.Module):

    def __init__(self):
        super().__init__()
        self.pase = wf_builder('cfg/frontend/PASE+.cfg').eval().cuda()
        self.pase.load_pretrained('FE_e199.ckpt', load_last=True, verbose=True)

        self.fc1 = nn.Linear(256*16 , 512)   
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 14)

        #self.batchNorm = m = nn.BatchNorm1d(256, affine=False)  #since it has 256 dimensions
        self.dropout1 = nn.Dropout2d(p = 0.5)
        self.dropout2 = nn.Dropout2d(p = 0.3)


    def forward(self, X):
        # Now we can forward waveforms as Torch tensors
        # x = torch.randn(1, 1, 100000)  # example with random noise to check shape
        x = self.pase(X.cuda(),device="cuda")     # x size will be (1, 256, 625), which are 625 frames of 256 dims each
        
        #Padding with 0's Max size = 512
        output = torch.zeros(1, 256, 512)
        num_frames = x.shape[-1]
        output[:, :, :num_frames] = x
        pooled = F.avg_pool2d(output, kernel_size=(1,32))

        print(pooled.shape)

        #passing to linear model
        x = self.dropout1(F.relu(self.fc1(pooled)))
        x = self.dropout2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        
        return x

paseplus = Pase().cuda()
print('-----Model------')
print(paseplus) 
print()

#Testing on a random sound file
waveform, sample_rate = torchaudio.load('/data/aditya/pase/data_tts_8k/as/ASF001-AS-ST01U3.wav')
print('Shape of waveform', np.shape(waveform))
print("Sample rate of waveform: {}".format(sample_rate))

waveform = torch.unsqueeze(waveform, 0)

output = paseplus(waveform)
print(output)   #Input after passing through the model
print(output.shape)

here is the snippet. I am using paseplus embeddings and passing them forward

@ptrblck have you found anything?

The original error seems to be raised by GPUForgetMult()(f, x) or CPUForgetMult()(f, x), which seem to be legacy Autograd functions.
In your current code snippet you are not using custom autograd.Functions, but an nn.Module.
If the error is still raised in the code snippet, I guess that wf_builder might create the autograd.Function and I would need to see the code to further debug.

@ptrblck, For every iteration, it generates this ''Legacy autograd" warning. So is there any script that ignores the warnings and does not print them while running the code?

You could try to disable all Python warnings globally.
I would rather recommend to fix the issue and use the new autograd.Functions, as the warning would become an error, if that’s not already the case in the latest release. :wink:

1 Like

Did you find solution? PASE+ gives me same error

ok i got it. Just modified GPUForgetMult in site-packages/torchqrnn/forget_mult.py

class GPUForgetMult(torch.autograd.Function):
    def __init__(self):
        super(GPUForgetMult, self).__init__()
    @staticmethod
    def forward(self, f, x, hidden_init=None):
        self.configured_gpus = {}
        self.ptx = None
        if self.ptx is None:
            program = Program(kernel, 'recurrent_forget_mult.cu')
            self.ptx = program.compile()

        if torch.cuda.current_device() not in self.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            self.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = self.configured_gpus[torch.cuda.current_device()]
#        self.compile()
        seq_size, batch_size, hidden_size = f.size()
        result = f.new(seq_size + 1, batch_size, hidden_size)
        # We only zero the result array (result[0]) if we don't set a hidden initial state
        # All other values (result[1:]) are overwritten by default
        if hidden_init is not None: result[0, :, :] = hidden_init
        else: result = result.zero_()
        ###
        grid_hidden_size = min(hidden_size, 512)
        grid = (math.ceil(hidden_size / grid_hidden_size), batch_size)
        self.forget_mult(grid=grid, block=(grid_hidden_size, 1), args=[result.data_ptr(), f.data_ptr(), x.data_ptr(), seq_size, batch_size, hidden_size], stream=self.stream)
        self.save_for_backward(f, x, hidden_init)
        self.result = result
        return result[1:, :, :]
    @staticmethod
    def backward(self, grad_h):
        self.configured_gpus = {}
        self.ptx = None
        if self.ptx is None:
            program = Program(kernel, 'recurrent_forget_mult.cu')
            self.ptx = program.compile()

        if torch.cuda.current_device() not in self.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            self.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = self.configured_gpus[torch.cuda.current_device()]
#        self.compile()
        f, x, hidden_init = self.saved_tensors
        h = self.result
        ###
        seq_size, batch_size, hidden_size = f.size()
        # Zeroing is not necessary as these will be overwritten
        grad_f = f.new(*f.size())
        grad_x = f.new(*f.size())
        grad_h_init = f.new(batch_size, hidden_size)
        ###
        grid_hidden_size = min(hidden_size, 512)
        grid = (math.ceil(hidden_size / grid_hidden_size), batch_size)
        self.bwd_forget_mult(grid=grid, block=(grid_hidden_size, 1), args=[h.data_ptr(), f.data_ptr(), x.data_ptr(), grad_h.data_ptr(), grad_f.data_ptr(), grad_x.data_ptr(), grad_h_init.data_ptr(), seq_size, batch_size, hidden_size], stream=self.stream)
        ###
        if hidden_init is not None:
            return grad_f, grad_x, grad_h_init
        return grad_f, grad_x

could have been done in more clean way although

Hi all,
Since this issue is already open, I would like to ask my doubt here. I am getting the same error that is mentioned in the heading of the issue. My network is this (it is basically a gradient reversal layer):

class GradReverse(Function):
    def __init__(self, lambd):
        self.lambd = lambd
    #@staticmethod
    def forward(self, x):
        return x.view_as(x)
    #@staticmethod
    def backward(self, grad_output):
        return (grad_output * -self.lambd)

I got the error and followed the example as shown in the link, and I tried writing in the modified format as given below:

class GradReverse(Function):
    def __init__(self, lambd):
        self.lambd = lambd
    @staticmethod
    def forward(ctx, x):
        result = x.view_as(x)
        ctx.save_for_backward(result)
        return result
    @staticmethod
    def backward(ctx, grad_output):
        return (grad_output * -self.lambd)

Them I get the error that self is not defined, can someone please help me in structuring my old layer according to the new method. I have been stuck with this for quite a while. :confused:
Any help would be greatly appreciated.
Thanks,
Megh

The example in this post might help:

Seems in the new style you would remove the init() entirely, and move whatever argument in your init() to forward() function.

Something like this, but I am not entirely sure. I am also looking for the solution to modify an old Pytorch code:

@staticmethod
def forward(ctx, x, lambd):
     ctx.lambd = lambd
     result = x.view_as(x)
     return result

@staticmethod
def backward(ctx, grad_output):
     return (grad_output * -ctx.lambd), None
2 Likes

Thanks. I got my answer here: Custom autograd.Function: must it be static?

2 Likes

This is also correct. Thank you.

I’m trying to run the code below and getting errors:

The code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

import cv2
from torch.autograd import Variable
import torch
from data import BaseTransform, VOC_CLASSES as labelmap
from ssd import build_ssd
import imageio

def detect(frame, net, transform):
height, width = frame.shape[:2]
frame_t = transform(frame)[0]
x = torch.from_numpy(frame_t).permute(2, 0, 1)
x = Variable(x.unsqueeze(0))
y = net(x)
detections = y.data
scale = torch.Tensor([width, height, width, height])
#detections = [batch, number of classes, number of occurence, (score, x0,y0,x1,y1)]
for i in range(detections.size(1)):
j = 0
while detections[0, i, j,0] >=0.6:
pt = (detections[0, i, j, 1:] * scale).numpy()
cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255,0,0),2)
cv2.putText(frame, labelmap[i-1], (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,255,255),2,cv2.LINE_AA)
j+=1
return frame

net = build_ssd(‘test’)
net.load_state_dict(torch.load(‘ssd300_mAP_77.43_v2.pth’, map_location= lambda storage, loc: storage))

transform = BaseTransform(net.size, (104/256.0, 117/256.0, 123/256.0))

reader = imageio.get_reader(‘video_teste01ssd.mp4’)
fps = reader.get_meta_data()[‘fps’]
writer = imageio.get_writer(‘output.mp4’,fps = fps)
for i, frame in enumerate(reader):
frame = detect(frame, net.eval(), transform)
writer.append_data(frame)
print(i)
writer.close()


The error is :slight_smile:

RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: Automatic differentiation package - torch.autograd — PyTorch 1.8.0 documentation)

Could someone help me, please?

I don’t see any custom autograd.Function implementation in the posted code snippet, so I assume the model or any other imported method uses it.
If you follow the link in the error message, you’ll get some information how this function should be implemented and would have to fix it.

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier. :wink:

Hi every one,
my problem is similar but no one of these solutions resolved it
this is my code

import torch.nn as nn
from torch.nn.modules.module import Module
from torch.autograd import Function
import correlation_cuda

class CorrelationFunction(Function):

    def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
        super(CorrelationFunction, self).__init__()
        self.pad_size = pad_size
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride1 = stride1
        self.stride2 = stride2
        self.corr_multiply = corr_multiply
        # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1)
        
    
    def forward(self, input1, input2):
        self.save_for_backward(input1, input2)

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()
            output = input1.new()

            correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 
                self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)

        return output
        
    
    def backward(self, grad_output):
        input1, input2 = self.saved_tensors

        with torch.cuda.device_of(input1):
            rbot1 = input1.new()
            rbot2 = input2.new()

            grad_input1 = input1.new()
            grad_input2 = input2.new()

            correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
                self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)

        return grad_input1, grad_input2


class Correlation(nn.Module):
    def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
        super(Correlation, self).__init__()
        self.pad_size = pad_size
        self.kernel_size = kernel_size
        self.max_displacement = max_displacement
        self.stride1 = stride1
        self.stride2 = stride2
        self.corr_multiply = corr_multiply
        
    
    def forward(self, input1, input2):

        result=CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2)
        #result=out.apply(input1, input2)

        return result

in this is how I call it

corr = Correlation(pad_size=self.search_range, kernel_size=1,
                                max_displacement=self.search_range, stride1=1,
                                stride2=1, corr_multiply=1)
out_corr = corr(x1, x2_warp)

first I got [ Legacy autograd function with non-static forward method is deprecated] Then I’ve added the decoration @staticmethod but I have gotten this error

out_corr = self.corr(x1, x2_warp)
  File "/home/ubuntu/anaconda3/envs/new/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
TypeError: forward() missing 1 required positional argument: 'input2'

Then I have added another .apply

out_corr = self.corr.apply(x1, x2_warp)
but I got this error 
out_corr = corr.apply(x1, x2_warp)
TypeError: apply() takes 2 positional arguments but 3 were given

This code was working fine with torch 1.1 and cuda 10.0
but now I have RTX 3090 GPU which doesn’t support this torch version. so now I am using cuda 11.1 torch 1.9
what should I do to allow this code to work with this torch version without any errors
Thank you in advance

hey i am getting error: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function )
how to solve it?
class Detect(Function):
def init(self, conf_thresh=0.01, top_k=200, nsm_thresh=0.45):
self.softmax = nn.Softmax(dim=-1)# lấy softamx của dim cuối cùng
self.conf_thresh = conf_thresh
self.top_k = top_k
self.nms_thresh = nsm_thresh

@staticmethod
def forward(self, loc_data, conf_data, dbox_list):
    num_batch = loc_data.size(0)  # batch_size (2,4,6,...32, 64, 128)
    num_dbox = loc_data.size(1)  # 8732
    num_classe = conf_data.size(2)  # 21

    conf_data = self.softmax(conf_data)
    # (batch_num, num_dbox, num_class) -> (batch_num, num_class, num_dbox)
    conf_preds = conf_data.transpose(2, 1)

    output = torch.zeros(num_batch, num_classe, self.top_k, 5)

    # xử lý từng bức ảnh trong một batch các bức ảnh
    for i in range(num_batch):
        # Tính bbox từ offset information và default box
        decode_boxes = decode(loc_data[i], dbox_list)

        # copy confidence score của ảnh thứ i
        conf_scores = conf_preds[i].clone()

        for cl in range(1, num_classe):
            c_mask = conf_scores[cl].gt(self.conf_thresh)  # chỉ lấy những confidence > 0.01
            scores = conf_scores[cl][c_mask]
            if scores.nelement() == 0:  # numel()
                continue

            # đưa chiều về giống chiều của decode_boxes để tính toán
            l_mask = c_mask.unsqueeze(1).expand_as(decode_boxes)  # (8732, 4)
            boxes = decode_boxes[l_mask].view(-1, 4)  # (số box có độ tự tin lớn hơn > 0.01, 4)
            ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
            output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1)

    return output

The link as well as this tutorial show how to write the new autograd.Functions and give you some examples.
In your code you are initializing data in the __init__ method, which is wrong.

By the way, if you want to directly affect the module using .apply(x) method when looping through self.features._modules.items(), use the code below:

for idx, module in self.features._modules.items():
   self.features._modules[idx] = YourTorchAutogradFunction.apply

dear muhamed ,
please i want to contact with you concerning that topic. thank u .