for batch_idx, (data, target) in enumerate(train_loader):
meta_data = data
meta_data.requires_grad = True
print(data.requires_grad) // prints True, since we are not doing deep copy
data, target = data.to(device), target.to(device) // moving tensors to GPU
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
print(meta_data.grad) //prints the corresponding gradient
print(data.grad) // prints None, Why is this the case ??
print((data - meta_data.cuda()).norm(2))
My question is why is meta_data.grad not none, while data.grad is none ? is it because of moving the data to the gpu ? Thanks in advance!