AutoGrad about the Conv2d

guangya_li · January 11, 2018, 3:02am

I read the source code of the PyTorch. And I have know the autogrid of the function of relu, sigmod and so on. All the function have a forward and backward function. But I don’t find the backward function of the conv2d. I want to know how PyTorch do the backward of conv2d

SimonW · January 11, 2018, 3:16am

gpu thnn path: https://github.com/pytorch/pytorch/blob/bc7a41af7d541e64f8b8f7318a7a2248c0119632/aten/src/THCUNN/generic/SpatialConvolutionMM.cu#L211-L486 (first method is grad_input, and second is grad_weight)

cpu thnn path is in a similar place.

cudnn path: https://github.com/pytorch/pytorch/blob/bc7a41af7d541e64f8b8f7318a7a2248c0119632/aten/src/ATen/native/cudnn/Conv.cpp#L1048 (less informative as it just calls cudnn)

guangya_li · January 11, 2018, 3:46pm

Thanks for you response! I also have a question,Is there some helpful document for reading the source code of the PyTorch.Because I want to quantize the weight or feature map of the neural
network. Such BWN ,BNN and so on.

SimonW · January 11, 2018, 4:19pm

I doubt these code will be helpful in achieving that. Can you do that directly from Python land using conv_layer.weight etc?

guangya_li · January 16, 2018, 1:35pm

I have find a example about bnn project on the PyTorch,But I confuse to the progress of the auto grad. I just want to learn about the bnn algorithm on the PyTorch. And I have used the darknet framework to learn the bnn algorithm, It use some temp binarize weights to froward, then backward to update the float weight.

github.com

itayhubara/BinaryNet.pytorch/blob/master/models/binarized_modules.py

import torch
import pdb
import torch.nn as nn
import math
from torch.autograd import Variable
from torch.autograd import Function

import numpy as np


def Binarize(tensor,quant_mode='det'):
    if quant_mode=='det':
        return tensor.sign()
    else:
        return tensor.add_(1).div_(2).add_(torch.rand(tensor.size())).clamp_(0,1).mul_(2).add_(-1)




class HingeLoss(nn.Module):

This file has been truncated. show original

SimonW · January 26, 2018, 5:01pm

You can think of autograd as reversely traversing a graph and calculating gradients for the leaf nodes, or even simpler as a blackbox algorithm for calculating gradients of a function.

Mostafa_Elhoushi · September 11, 2019, 12:14am

I think this is what you’re looking for:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from torch.nn import init

import math
import numpy as np

class Conv2dShiftFunction(Function):

    # Note that both forward and backward are @staticmethods
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
        output = F.conv2d(input, weight,  bias, stride, padding, dilation, groups)

        ctx.save_for_backward(input, weight, bias)
        ctx.stride = stride
        ctx.padding = padding 
        ctx.dilation = dilation
        ctx.groups = groups

        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias = ctx.saved_tensors
        stride = ctx.stride
        padding = ctx.padding 
        dilation = ctx.dilation
        groups = ctx.groups
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if ctx.needs_input_grad[0]:
            grad_input = torch.nn.grad.conv2d_input(input.shape, weight, grad_output, stride, padding, dilation, groups)
        if ctx.needs_input_grad[1]:
            grad_weight = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, stride, padding, dilation, groups) 
        if bias is not None and ctx.needs_input_grad[2]:
            grad_bias = grad_output.sum((0,2,3)).squeeze(0)

        return grad_input, grad_weight, grad_bias, None, None, None, None


class _ConvNdShift(nn.Module):

    __constants__ = ['stride', 'padding', 'dilation', 'groups', 'bias', 'padding_mode']

    def __init__(self, in_channels, out_channels, kernel_size, stride,
                 padding, dilation, transposed, output_padding,
                 groups, bias, padding_mode, check_grad=False):
        super(_ConvNdShift, self).__init__()
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.transposed = transposed
        self.output_padding = output_padding
        self.groups = groups
        self.padding_mode = padding_mode

        if check_grad:
            tensor_constructor = torch.DoubleTensor # double precision required to check grad
        else:
            tensor_constructor = torch.Tensor # In PyTorch torch.Tensor is alias torch.FloatTensor

        if transposed:
            self.weight = nn.Parameter(tensor_constructor(
                in_channels, out_channels // groups, *kernel_size))
        else:
            self.weight = nn.Parameter(tensor_constructor(
                out_channels, in_channels // groups, *kernel_size))
        if bias:
            self.bias = nn.Parameter(tensor_constructor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        self.shift.data, self.sign.data = get_shift_and_sign(self.weight)
        
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.shift)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def extra_repr(self):
        s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
             ', stride={stride}')
        if self.padding != (0,) * len(self.padding):
            s += ', padding={padding}'
        if self.dilation != (1,) * len(self.dilation):
            s += ', dilation={dilation}'
        if self.output_padding != (0,) * len(self.output_padding):
            s += ', output_padding={output_padding}'
        if self.groups != 1:
            s += ', groups={groups}'
        if self.bias is None:
            s += ', bias=False'
        return s.format(**self.__dict__)

class Conv2dShift(_ConvNdShift):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros', check_grad=False):
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(Conv2dShift, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, _pair(0), groups, bias, padding_mode)

    #@weak_script_method
    def forward(self, input):
        if self.padding_mode == 'circular':
            expanded_padding = ((self.padding[1] + 1) // 2, self.padding[1] // 2,
                                (self.padding[0] + 1) // 2, self.padding[0] // 2)

            return Conv2dShiftFunction.apply(F.pad(input, expanded_padding, mode='circular'),
                            self.weight, self.bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        else:
            return Conv2dShiftFunction.apply(input, self.weight, self.bias, self.stride,
                            self.padding, self.dilation, self.groups)