Gradient is None!

I am trying to compute the gradient of the model output w.r.t the input. The gradient should be calculated on some slices of the inputs and outputs as can be seen in the code below.
Briefly, the model is a time-series model in which the input is of a 3D shape and after passing to the model, the output will be of a 2D shape.
The initial shape of the data is input=(3150, 9) and output=(3150, 8)

for epoch in range(config["num_epochs"]):
    model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs) 
        
        u1 = outputs[:, 0:1] 
        u2 = outputs[:, 1:2]
        u3 = outputs[:, 2:3]
        u4 = outputs[:, 3:4]
        u5 = outputs[:, 4:5]
        u6 = outputs[:, 5:6]
        u7 = outputs[:, 6:7]
        u8 = outputs[:, 7:8]
        
        last_input = inputs[:, config["delay"], :]
        last_input.requires_grad = True
        
        print("u1 requires grad:", u1.requires_grad)
        print("is u1 leaf:", u1.is_leaf)
        print("last_input requires grad:", last_input.requires_grad)
        print("is last_input leaf:", last_input.is_leaf)
        
        ################################### output_1  ###############################################
        du1 = torch.autograd.grad(u1, last_input, grad_outputs=torch.ones_like(u1), retain_graph=True, create_graph=True, allow_unused=True)[0]
        if du1 is None:
            print("Gradient is None!")
        else:
            print("Gradient is not None!")

and After running the code I get

u1 requires grad: True
is u1 leaf: False
last_input requires grad: True
is last_input leaf: True
Gradient is None!
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-0115271f0e18> in <module>
    235 
    236         #print(du1.shape, du1.dtype)
--> 237         u1_t = du1[:, 0:1]
    238         u1_x1 = du1[:, 1:2]
    239         u1_x2 = du1[:, 2:3]

TypeError: 'NoneType' object is not subscriptable

So my question is what I am doing wrong that I can not compute the gradient?

Your code is unfortunately not properly formatted so quite hard to read.
However, this line of code:

last_input.requires_grad = True

looks wrong, since you should not need to call .requires_grad = True on a tensor, which is attached to the computation graph. If this was done to fix another issue, you should check if and where the tensor is detached from the computation graph.

Sorry for the format, I did not know how to post a code.
The thing is that if the inputs were 2D the code below would work and compute the gradients but here I just want to compute the gradient of output w.r.t a slice of the input (since it is 3D), so the gradient is always None!

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_fn):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(layer_sizes) - 1):
            self.layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
            if i != len(layer_sizes) - 2:  
                self.layers.append(activation_fn())

    def forward(self, x):
        x = x.reshape(x.shape[0], -1) 
        for layer in self.layers:
            x = layer(x)
        return x


for epoch in range(config["num_epochs"]):
    model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs.requires_grad = True
        optimizer.zero_grad()
        outputs = model(inputs) 
        
        u1 = outputs[:, 0:1] 
        
        
        ################################### output_1  ###############################################
        du1 = torch.autograd.grad(u1, inputs, grad_outputs=torch.ones_like(u1), retain_graph=True, create_graph=True, allow_unused=True)[0]

Your code works fine for me:

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_fn):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(layer_sizes) - 1):
            self.layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
            if i != len(layer_sizes) - 2:  
                self.layers.append(activation_fn())

    def forward(self, x):
        x = x.reshape(x.shape[0], -1) 
        for layer in self.layers:
            x = layer(x)
        return x


model = MLP([1, 1], nn.ReLU)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
inputs = torch.randn(1, 1)
inputs.requires_grad = True
optimizer.zero_grad()
outputs = model(inputs) 
u1 = outputs[:, 0:1] 
        
du1 = torch.autograd.grad(u1, inputs, grad_outputs=torch.ones_like(u1))
print(du1)
# (tensor([[-0.6263]]),)

Yes, that code works for 2D inputs.
how about if inputs ==torch.Size([32, 6, 9]) and outputs==torch.Size([32, 8]) and inside the loop inputs=inputs[:, 6, :]? In this case, it does not work.

Still works:

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_fn):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(layer_sizes) - 1):
            self.layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
            if i != len(layer_sizes) - 2:  
                self.layers.append(activation_fn())

    def forward(self, x):
        x = x.reshape(x.shape[0], -1) 
        for layer in self.layers:
            x = layer(x)
        return x


model = MLP([6*9, 8], nn.ReLU)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
inputs = torch.randn(32, 6, 9)
inputs.requires_grad = True
optimizer.zero_grad()
outputs = model(inputs) 
print(outputs.shape)
# torch.Size([32, 8])
u1 = outputs[:, 0:1] 
        
du1 = torch.autograd.grad(u1, inputs, grad_outputs=torch.ones_like(u1))
print(du1)
# (tensor([[[-0.1087, -0.0872,  0.1138,  ...,  0.0621,  0.0015, -0.0565],
#          [ 0.1140, -0.0478, -0.0525,  ...,  0.0496, -0.0913,  0.0653],
#          [ 0.0845,  0.0077,  0.0382,  ...,  0.0925, -0.0222,  0.0253],
#          [-0.0153,  0.0852, -0.0956,  ..., -0.0256, -0.0470,  0.1336],
#          [-0.0848,  0.0494, -0.1045,  ..., -0.0141,  0.0993, -0.0745],
#          [-0.0953,  0.0397,  0.0770,  ...,  0.0516,  0.0721,  0.0819]],

#         [[-0.1087, -0.0872,  0.1138,  ...,  0.0621,  0.0015, -0.0565],
#          [ 0.1140, -0.0478, -0.0525,  ...,  0.0496, -0.0913,  0.0653],
#          [ 0.0845,  0.0077,  0.0382,  ...,  0.0925, -0.0222,  0.0253],
#          [-0.0153,  0.0852, -0.0956,  ..., -0.0256, -0.0470,  0.1336],
#          [-0.0848,  0.0494, -0.1045,  ..., -0.0141,  0.0993, -0.0745],
#          [-0.0953,  0.0397,  0.0770,  ...,  0.0516,  0.0721,  0.0819]],

#         [[-0.1087, -0.0872,  0.1138,  ...,  0.0621,  0.0015, -0.0565],
#          [ 0.1140, -0.0478, -0.0525,  ...,  0.0496, -0.0913,  0.0653],
#          [ 0.0845,  0.0077,  0.0382,  ...,  0.0925, -0.0222,  0.0253],
#          [-0.0153,  0.0852, -0.0956,  ..., -0.0256, -0.0470,  0.1336],
#          [-0.0848,  0.0494, -0.1045,  ..., -0.0141,  0.0993, -0.0745],
#          [-0.0953,  0.0397,  0.0770,  ...,  0.0516,  0.0721,  0.0819]],

#         ...,

Instead of suggesting new shapes, please post a minimal and executable code snippet reproducing the issue.

This is what I meant

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_fn):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(layer_sizes) - 1):
            self.layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
            if i != len(layer_sizes) - 2:  
                self.layers.append(activation_fn())

    def forward(self, x):
        x = x.reshape(x.shape[0], -1) 
        for layer in self.layers:
            x = layer(x)
        return x



model = MLP([6*9, 8], nn.ReLU)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
inputs = torch.randn(32, 6, 9)
inputs.requires_grad = True
optimizer.zero_grad()
outputs = model(inputs) 
last_input = inputs[:, 5, :]
du1 = torch.autograd.grad(u1, last_input, grad_outputs=torch.ones_like(u1), allow_unused=True)
print(du1)

# (None,)

You are slicing the inputs tensor thus creating a new nojn-leaf tensor, which wasn’t used in the model’s forward pass.
Remove the allow_unused=True argument and you will see a proper error message:

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

If you are interested in a subset of the input, either slice it before passing the tensor to the model or slice the gradient afterwards.

Okay, Thank you very much