I read the source code of the PyTorch. And I have know the autogrid of the function of relu, sigmod and so on. All the function have a forward and backward function. But I don’t find the backward function of the conv2d. I want to know how PyTorch do the backward of conv2d
gpu thnn path: https://github.com/pytorch/pytorch/blob/bc7a41af7d541e64f8b8f7318a7a2248c0119632/aten/src/THCUNN/generic/SpatialConvolutionMM.cu#L211-L486 (first method is grad_input, and second is grad_weight)
cpu thnn path is in a similar place.
cudnn path: https://github.com/pytorch/pytorch/blob/bc7a41af7d541e64f8b8f7318a7a2248c0119632/aten/src/ATen/native/cudnn/Conv.cpp#L1048 (less informative as it just calls cudnn)
Thanks for you response! I also have a question,Is there some helpful document for reading the source code of the PyTorch.Because I want to quantize the weight or feature map of the neural
network. Such BWN ,BNN and so on.
I doubt these code will be helpful in achieving that. Can you do that directly from Python land using conv_layer.weight
etc?
I have find a example about bnn project on the PyTorch,But I confuse to the progress of the auto grad. I just want to learn about the bnn algorithm on the PyTorch. And I have used the darknet framework to learn the bnn algorithm, It use some temp binarize weights to froward, then backward to update the float weight.
You can think of autograd as reversely traversing a graph and calculating gradients for the leaf nodes, or even simpler as a blackbox algorithm for calculating gradients of a function.
I think this is what you’re looking for:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from torch.nn import init
import math
import numpy as np
class Conv2dShiftFunction(Function):
# Note that both forward and backward are @staticmethods
@staticmethod
# bias is an optional argument
def forward(ctx, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
output = F.conv2d(input, weight, bias, stride, padding, dilation, groups)
ctx.save_for_backward(input, weight, bias)
ctx.stride = stride
ctx.padding = padding
ctx.dilation = dilation
ctx.groups = groups
return output
# This function has only a single output, so it gets only one gradient
@staticmethod
def backward(ctx, grad_output):
# This is a pattern that is very convenient - at the top of backward
# unpack saved_tensors and initialize all gradients w.r.t. inputs to
# None. Thanks to the fact that additional trailing Nones are
# ignored, the return statement is simple even when the function has
# optional inputs.
input, weight, bias = ctx.saved_tensors
stride = ctx.stride
padding = ctx.padding
dilation = ctx.dilation
groups = ctx.groups
grad_input = grad_weight = grad_bias = None
# These needs_input_grad checks are optional and there only to
# improve efficiency. If you want to make your code simpler, you can
# skip them. Returning gradients for inputs that don't require it is
# not an error.
if ctx.needs_input_grad[0]:
grad_input = torch.nn.grad.conv2d_input(input.shape, weight, grad_output, stride, padding, dilation, groups)
if ctx.needs_input_grad[1]:
grad_weight = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, stride, padding, dilation, groups)
if bias is not None and ctx.needs_input_grad[2]:
grad_bias = grad_output.sum((0,2,3)).squeeze(0)
return grad_input, grad_weight, grad_bias, None, None, None, None
class _ConvNdShift(nn.Module):
__constants__ = ['stride', 'padding', 'dilation', 'groups', 'bias', 'padding_mode']
def __init__(self, in_channels, out_channels, kernel_size, stride,
padding, dilation, transposed, output_padding,
groups, bias, padding_mode, check_grad=False):
super(_ConvNdShift, self).__init__()
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.transposed = transposed
self.output_padding = output_padding
self.groups = groups
self.padding_mode = padding_mode
if check_grad:
tensor_constructor = torch.DoubleTensor # double precision required to check grad
else:
tensor_constructor = torch.Tensor # In PyTorch torch.Tensor is alias torch.FloatTensor
if transposed:
self.weight = nn.Parameter(tensor_constructor(
in_channels, out_channels // groups, *kernel_size))
else:
self.weight = nn.Parameter(tensor_constructor(
out_channels, in_channels // groups, *kernel_size))
if bias:
self.bias = nn.Parameter(tensor_constructor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
self.shift.data, self.sign.data = get_shift_and_sign(self.weight)
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.shift)
bound = 1 / math.sqrt(fan_in)
init.uniform_(self.bias, -bound, bound)
def extra_repr(self):
s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
', stride={stride}')
if self.padding != (0,) * len(self.padding):
s += ', padding={padding}'
if self.dilation != (1,) * len(self.dilation):
s += ', dilation={dilation}'
if self.output_padding != (0,) * len(self.output_padding):
s += ', output_padding={output_padding}'
if self.groups != 1:
s += ', groups={groups}'
if self.bias is None:
s += ', bias=False'
return s.format(**self.__dict__)
class Conv2dShift(_ConvNdShift):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1,
bias=True, padding_mode='zeros', check_grad=False):
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
super(Conv2dShift, self).__init__(
in_channels, out_channels, kernel_size, stride, padding, dilation,
False, _pair(0), groups, bias, padding_mode)
#@weak_script_method
def forward(self, input):
if self.padding_mode == 'circular':
expanded_padding = ((self.padding[1] + 1) // 2, self.padding[1] // 2,
(self.padding[0] + 1) // 2, self.padding[0] // 2)
return Conv2dShiftFunction.apply(F.pad(input, expanded_padding, mode='circular'),
self.weight, self.bias, self.stride,
_pair(0), self.dilation, self.groups)
else:
return Conv2dShiftFunction.apply(input, self.weight, self.bias, self.stride,
self.padding, self.dilation, self.groups)