Autograd.grad returns different values for the same inputs

I am trying to calculate the gradient of some points using autograd.grad().
The code is the following:

for batch in data_loader:
  batch_coordinates = batch[0]
  out = model(batch_coordinates.requires_grad_())

  x1, y1 = batch_coordinates[:, 0].reshape(-1, 1), batch_coordinates[:, 1].reshape(-1, 1)

  y = custom_output(out, x1, y1)
  grad_1 = torch.autograd.grad(y.sum(), batch_coordinates, create_graph=True) #calulate gradient
  grad_2 = torch.autograd.grad(y.sum(), (x1, y1), create_graph=True) #calulate gradient

Where batch_coordinates is a torch tensor of dimension torch.Size([n, 2]), with n the number of points; and the model a very simple three-layered NN. custom_output(out, x1, y1) is a function that I need to interpolate the result of the network and takes in input the x,y of the points and the result of the network. My question is: why are grad_1/2 different (accounting for the fact that the grad_2 is a list)?

the output is:

GRAD MINIBATCH COORDINATES: 
(tensor([[-6.5256e-03,  1.5925e-03],
        [-6.1777e-03, -8.8977e-05],
        [-2.5146e-03,  3.1782e-03],
        [-4.3833e-03,  2.2729e-03],
        [-6.6386e-03,  3.9808e-04],
        [-5.9642e-03, -2.2408e-04],
        [ 1.0123e-03,  4.7309e-03],
        [ 3.8920e-02, -1.3823e-01],
        [-6.2750e+00,  1.4732e+00],
        [-5.8813e-03, -2.6810e-04]], grad_fn=<AddBackward0>),)
GRAD X1, Y1: 
(tensor([[ 0.0000e+00],
        [ 0.0000e+00],
        [ 0.0000e+00],
        [ 0.0000e+00],
        [-2.0982e-05],
        [ 0.0000e+00],
        [-1.5622e-04],
        [ 4.5191e-02],
        [-6.2711e+00],
        [ 0.0000e+00]], grad_fn=<AddBackward0>), tensor([[ 0.0000e+00],
        [ 0.0000e+00],
        [ 0.0000e+00],
        [ 0.0000e+00],
        [ 7.6683e-06],
        [ 0.0000e+00],
        [-2.7294e-05],
        [-1.3825e-01],
        [ 1.4730e+00],
        [ 0.0000e+00]], grad_fn=<AddBackward0>))

Could you post a minimal and executable code snippet reproducing the output, please?

This for the definition of the NN:

def custom_output(o, x, y):
  minA = [-0.5, 1]
  minB = [0.5, 1]
  XA = (1/2 -1/2 * torch.tanh(100*(((x - minA[0])**2 + (y - minA[1])**2) - (0.1 + 0.02)**2)))
  XB = (1/2 -1/2 * torch.tanh(100*(((x - minB[0])**2 + (y - minB[1])**2) - (0.1 + 0.02)**2)))
  q = (1-XA)*((1-XB)* o + (XB))
  return q

def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0)

class NN(nn.Module):
  def __init__(self, input_size): #number of initial nodes
    super(NN, self).__init__() #calls the initiation function of the class 

    self.hidden1 = nn.Linear(input_size, 10)
    self.hidden2 = nn.Linear(10, 20)
    self.hidden3 = nn.Linear(20, 20)
    self.output = nn.Linear(20, 1)

  def forward(self, x):
    #maybe sigmoid works better
    x = torch.tanh(self.hidden1(x))
    x = torch.tanh(self.hidden2(x))
    x = torch.tanh(self.hidden3(x))
    x = torch.sigmoid(self.output(x))
    return x

To generate the input points and initialize the model:

model = NN(2)

x_input = torch.rand(10, 1)* 2 - 1 #x from -1 to 1
y_input = torch.rand(10, 1) * 2 #y from 0 to 2
coordinates = torch.cat((x_input, y_input), dim=1)
plt.scatter(x_input.detach().numpy(), y_input.detach().numpy())

dataset = TensorDataset(coordinates)
batch_size = 32
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

Finally, calculate the gradient of the points:

for batch in data_loader:
  batch_coordinates = batch[0]
  print(f'MINIBATCH COORDINATES: \n{batch_coordinates}, \n {batch_coordinates.shape}')
  
  out = model(batch_coordinates.requires_grad_())

  x1, y1 = batch_coordinates[:, 0].reshape(-1, 1), batch_coordinates[:, 1].reshape(-1, 1)
  print(f'X1, Y1 SEPARATED COORDINATES: \n{x1, y1}')

  y = custom_output(out, x1, y1)
  print(f'NN OUTPUT: \n{y}')
  grad = torch.autograd.grad(y.sum(), batch_coordinates, create_graph=True) #calulate gradient
  print(f'GRAD MINIBATCH COORDINATES: \n{grad}')
  grad = torch.autograd.grad(y.sum(), (x1, y1), create_graph=True) #calulate gradient
  print(f'GRAD X1, Y1: \n{grad}')