Gradient with respect to input

Hi,
Suppose I have a network with say 4 layers. How do i get the passing gradient back?
DL/Dx for layer 3, layer 2?
Currently, I can grad with respect to weights and biases only, but not the intermediate x.

You need to use hook.

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(128, 64)
        self.relu1 = nn.Relu()
        self.layer2 = nn.Linear(64, 64)
        self.relu2 = nn.Relu()

        
        self.register_hook = False
        self.hook = {'layer1':[], 'relu1':[], 'layer2':[], 'relu2':[]}
        
    def forward(self, x):
        out_layer1 = self.layer1(x)
        out_relu1 = self.relu1(out_layer1)
        
        out_layer2 = self.layer2(out_relu1)
        out_relu2 = self.relu2(out_layer2)

        if self.register_hook:
            out_layer1.register_hook(lambda grad: self.hook_fn(grad=grad, name='layer1'))
            out_layer1.retain_grad()

            out_relu1.register_hook(lambda grad: self.hook_fn(grad=grad, name='relu1'))
            out_relu1.retain_grad()

            out_layer2.register_hook(lambda grad: self.hook_fn(grad=grad, name='layer2'))
            out_layer2.retain_grad()

            out_relu2.register_hook(lambda grad: self.hook_fn(grad=grad, name='relu2'))
            out_relu2.retain_grad()

        return out_relu2
    
    def hook_fn(self, grad, name):
        self.hook[name].append(grad)

    def reset_hook(self):
        self.hook = {'layer1':[], 'relu1':[], 'layer2':[], 'relu2':[]}

Do I need make self.hook=True somewhere?
This is my current network

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu=nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(8192, 10)
        np.random.seed(1)
        self.fc1.weight.data = torch.FloatTensor(1e-3 * np.random.randn(8192, 10)).T
        np.random.seed(2)
        self.fc1.bias.data = torch.FloatTensor(np.zeros(10))

        np.random.seed(3)
        self.conv1.weight.data = torch.FloatTensor(1e-3 * np.random.randn(32, 3, 5, 5))
        self.conv1.bias.data = torch.FloatTensor(np.zeros(32))
        
        self.register_hook = False
        self.hook = {'conv1':[], 'relu':[], 'pool':[],'fc1':[]}

    def hook_fn(self, grad, name):
        self.hook[name].append(grad)
    def reset_hook(self):
        self.hook = {'conv1':[], 'relu':[],'pool':[], 'fc1':[]}

    def forward(self, x):
        step1 = self.conv1(x)
        step2 = self.relu(step1)
        step3 = self.pool(step2)
        step3_flatten=step3.reshape(len(x),-1)
        step4 = self.fc1(step3_flatten)

        if self.register_hook:
            step1.register_hook(lambda grad: self.hook_fn(grad=grad, name='step1'))
            step1.retain_grad()

            step2.register_hook(lambda grad: self.hook_fn(grad=grad, name='step2'))
            step2.retain_grad()

            step3.register_hook(lambda grad: self.hook_fn(grad=grad, name='step3'))
            step3.retain_grad()

            step4.register_hook(lambda grad: self.hook_fn(grad=grad, name='step4'))
            step4.retain_grad()
        return step4

the reason to have the register_hook is to stop hook when needed. but you may set it to be True as default, and access the gradients by net.hook

Thanks. I will set self.register_hook = True.
Can I access the gradients with net.hook or it would require other syntax?

net.hook is a dictionary, so just access the gradients of the layers by the keys. e.g. net.hook['conv1'] which is a list of gradients after each forward/backward pass

Hi Kevy,
Is the dictionary storing the gradient values one step ahead? It looks a little out of sync when I compare against my numpy implementation.

it stores the gradient after each backward pass. so shouldn’t be out of sync. it hooks after each forward pass. You can double check by setting the parameters to be zero, you should get zero grads for that iteration.