Hi everyone.
My code is in the following. The input size of the model is 8 which is the concatenation of two tensors with the size of 5 and 3, I want to do just one forward on the model and one backward to get two gradients. One is w.r.t the input with size 5 (first part of concatenation) and the other one is w.r.t the input with size 3 (second part of concatenation).
Batch_size = 5
I am new in PyTorch and appreciate any help from you side.
class Net(nn.Module):
"""
A simple multilayer perceptron with one hidden layer.
"""
def __init__(self, num_input, num_hidden, num_output, dropout,
activation='tanh'):
super(Net, self).__init__()
self.dropout = nn.Dropout(dropout)
self.fc1 = nn.Linear(num_input, num_hidden)
self.fc2 = nn.Linear(num_hidden, num_output)
if activation == 'tanh':
self.activation_f = torch.tanh
elif activation == 'relu':
self.activation_f = torch.relu
def forward(self, x):
x = self.activation_f(self.fc1(x))
x = self.dropout(x)
x = torch.sigmoid(self.fc2(x))
return x
model = Net(num_input=8, num_hidden=4, num_output=2, dropout=0.0, activation='tanh')
for batch_idx, (data, label) in enumerate(data_loader):
data = data.clone().detach().requires_grad_(True)
print('data:', data)
x1 = data[:,:5]
x1 = x1.clone().detach().requires_grad_(True)
x2 = x1.clone().detach().requires_grad_(True)
x2 = data[:, 5:8]
print('x1:', x1)
print('x2:',x2)
output= model(data)
print('output:', output)
loss = loss_fn(pred=output, target=label)
print('loss:', loss)
grad_tensor_1 = torch.autograd.grad(outputs=loss, inputs=x1, allow_unused=True)
print('gradient of x1: ', grad_tensor_1)
grad_tensor_2 = torch.autograd.grad(outputs=loss, inputs=x2, allow_unused=True)
print('gradient of x2: ', grad_tensor_2)
loss.backward()
optimizer.step()
I got the following output:
label tensor([1, 0, 1, 0, 0])
data: tensor([[0.4777, 0.5840, 0.5885, 0.5624, 0.5781, 0.5119, 0.5466, 0.3983],
[0.6305, 0.6040, 0.4076, 0.5833, 0.4156, 0.5065, 0.4737, 0.4937],
[0.5168, 0.5520, 0.6126, 0.5375, 0.5495, 0.4984, 0.4365, 0.4092],
[0.4785, 0.5723, 0.5018, 0.5540, 0.4677, 0.5447, 0.3886, 0.5680],
[0.5685, 0.6240, 0.3949, 0.5555, 0.4099, 0.5065, 0.4737, 0.4937]],
requires_grad=True)
x1: tensor([[0.4777, 0.5840, 0.5885, 0.5624, 0.5781],
[0.6305, 0.6040, 0.4076, 0.5833, 0.4156],
[0.5168, 0.5520, 0.6126, 0.5375, 0.5495],
[0.4785, 0.5723, 0.5018, 0.5540, 0.4677],
[0.5685, 0.6240, 0.3949, 0.5555, 0.4099]], requires_grad=True)
x2: tensor([[0.5119, 0.5466, 0.3983],
[0.5065, 0.4737, 0.4937],
[0.4984, 0.4365, 0.4092],
[0.5447, 0.3886, 0.5680],
[0.5065, 0.4737, 0.4937]], grad_fn=<SliceBackward>)
output: tensor([[0.5724, 0.3881],
[0.5688, 0.3966],
[0.5680, 0.3904],
[0.5701, 0.3887],
[0.5696, 0.3944]], grad_fn=<SigmoidBackward>)
loss: tensor(0.6804, grad_fn=<NllLossBackward>)
gradient w.r.t x1: (None,)
gradient w.r.t x2: (None,)
How can I get the gradient w.r.t x1 and x2?