When I write cpp extension for custom cudnn convolution, I use nn.autograd and nn.Module wrap my cpp extension.
autograd wraper code in Cudnn_conv2d_func.py file like this:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
import math
import cudnn_conv2d
class Cudnn_conv2d_func(Function):
@staticmethod
def forward(ctx, input, weight, bias=None, stride=(1, 1), padding=(0, 0), dilation=(1, 1), groups=1, benchmark=True, deterministic=False):
ctx.stride = stride
ctx.padding= padding
ctx.dilation = dilation
ctx.groups = groups
ctx.benchmark = benchmark
ctx.deterministic = deterministic
ctx.save_for_backward(input, weight)
stride_width, stride_height = stride[0], stride[1]
padding_width, padding_height = padding[0], padding[1]
dilation_width, dilation_height = dilation[0], dilation[1]
return cudnn_conv2d.conv2d(input, weight, bias, stride_width, stride_height, padding_width,
padding_height, dilation_width, dilation_height, groups, benchmark, deterministic)
@staticmethod
def backward(ctx, grad_output):
input, weight = ctx.saved_variables
print(ctx.needs_input_grad)
grad_input, grad_weight, grad_bias = cudnn_conv2d.conv2d_backward(
input, grad_output, weight,
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.groups, ctx.benchmark, ctx.deterministic,
ctx.needs_input_grad[:3])
return grad_input, grad_weight, None, None, None, None, None, None
nn.Module wraper code in Cudnn_conv2d.py file like this:
import torch
from torch.nn.parameter import Parameter
from torch.nn import init
from torch.nn.modules import Module
from torch.nn.modules.utils import _single, _pair, _triple
from torch._jit_internal import List
from .Cudnn_conv2d_func import Cudnn_conv2d_func
import math
class _ConvNd(Module):
__constants__ = ['stride', 'padding', 'dilation', 'groups', 'bias',
'padding_mode', 'output_padding', 'in_channels',
'out_channels', 'kernel_size']
def __init__(self, in_channels, out_channels, kernel_size, stride,
padding, dilation, transposed, output_padding,
groups, bias, padding_mode):
super(_ConvNd, self).__init__()
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.transposed = transposed
self.output_padding = output_padding
self.groups = groups
self.padding_mode = padding_mode
if transposed:
self.weight = Parameter(torch.Tensor(
in_channels, out_channels // groups, *kernel_size))
else:
self.weight = Parameter(torch.Tensor(
out_channels, in_channels // groups, *kernel_size))
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
else:
# self.register_parameter('bias', None)
self.bias = Parameter(torch.zeros(out_channels))
self.reset_parameters()
def reset_parameters(self):
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in)
init.uniform_(self.bias, -bound, bound)
def extra_repr(self):
s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
', stride={stride}')
if self.padding != (0,) * len(self.padding):
s += ', padding={padding}'
if self.dilation != (1,) * len(self.dilation):
s += ', dilation={dilation}'
if self.output_padding != (0,) * len(self.output_padding):
s += ', output_padding={output_padding}'
if self.groups != 1:
s += ', groups={groups}'
if self.bias is None:
s += ', bias=False'
if self.padding_mode != 'zeros':
s += ', padding_mode={padding_mode}'
return s.format(**self.__dict__)
def __setstate__(self, state):
super(_ConvNd, self).__setstate__(state)
if not hasattr(self, 'padding_mode'):
self.padding_mode = 'zeros'
class Cudnn_conv2d(_ConvNd):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1,
bias=True, padding_mode='zeros'):
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
super(Cudnn_conv2d, self).__init__(
in_channels, out_channels, kernel_size, stride, padding, dilation,
False, _pair(0), groups, bias, padding_mode)
def conv2d_forward(self, input, weight):
if self.padding_mode == 'circular':
expanded_padding = ((self.padding[1] + 1) // 2, self.padding[1] // 2,
(self.padding[0] + 1) // 2, self.padding[0] // 2)
return Cudnn_conv2d_func.apply(F.pad(input, expanded_padding, mode='circular'), weight, self.bias, self.stride, _pair(0), self.dilation, self.groups)
return Cudnn_conv2d_func.apply(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
def forward(self, input):
return self.conv2d_forward(input, self.weight)
if __name__=='__main__':
conv = Cudnn_conv2d(3, 3, 3).cuda()
print(conv)
x = torch.rand([128, 3, 32, 32],requires_grad=True, device='cuda')
y = conv(x)
y = y.sum()
print(y)
y.backward()
When I run Cudnn_conv2d.py, I can get right result. But When I use Cudnn_conv2d instead nn.Conv2d in net. I meet Error like this;
ctx.needs_input_grad[:3])
RuntimeError: Expected tensor's dynamic type to be Variable, not Tensor
I am confusing. Net code look like this:
import torch
import torch.nn as nn
import torch.nn.functional as F
from .Cudnn_conv2d import Cudnn_conv2d
class TestNet1(nn.Module):
def __init__(self):
super().__init__()
self.conv1=Cudnn_conv2d(3,32,5,bias=False)
self.conv2=Cudnn_conv2d(32,32,3,bias=False)
self.conv3=Cudnn_conv2d(32,32,3,bias=False)
self.bn = nn.BatchNorm2d(32)
self.fc1=nn.Linear(16*32,10)
def forward(self,x):
x=self.conv1(x) #28
x=F.relu(x)
x=F.max_pool2d(x,2,2) #14
x=self.conv2(x) # 12
x = self.bn(x)
x=F.relu(x)
x=F.max_pool2d(x,2,2) # 6
x=self.conv3(x) # 4
x=x.view(x.size(0),-1)
x=self.fc1(x)
return x
dataset is cifar10.
Does anybody know where to look to debug?
Thanks!