Hi @albanD,
When I tried to include autograd.grad in backward as above, autograd.grad wasn’t returning anything though is was getting executed. I don’t know why, can you please have a look!
Then I tried it with different approach :
class Custom_Convolution(torch.autograd.Function):
@staticmethod
def forward(ctx, input, weight, bias, stride, padding): #input(from previous layer)'s shape = ([batch_size=100, 96, 8, 8])
with torch.enable_grad():
output = torch.nn.functional.conv2d(input, weight, bias, stride, padding)
h = output.shape[2]
w = output.shape[3]
# output from forward with size = ([batch_size, 128,4,4])
output= output.view(output.shape[0], output.shape[1], -1) # output size = ([batch_size, 128,16])
cont = torch.tensor([0.]).to(dev).requires_grad_(True)
for i in range(0, output.shape[0]):
for f in range(len(output[i])):
Zi_unnormalized = output[i][f]
Zi = torch.nn.functional.normalize(Zi_unnormalized, dim = 0)
# Zj and Zk are tensors made from output[i][*] and output[other than i][*]. Zj and Zk varies for each Zi (or f)
Zi_Zk = torch.Tensor([0]).to(dev).requires_grad_(True)
for k in Zk:
k= torch.nn.functional.normalize(k, dim = 0)
zi_zk = ...
Zi_Zk = Zi_Zk.add(zi_zk)
# Similarly computing Zi_Zj
# Li = some algebra of Zi_Zj and Zi_Zk
# number of 'Li' values = output.shape[0] * output.shape[1]
cont = cont.add(Li) # 1 value
print("\n Loss: ", cont_loss, cont_loss.requires_grad)
# weight1 = weight.clone().requires_grad_(True)
# bias1 = bias.clone().requires_grad_(True)
# weight.shape = ([128, 96, 5, 5])
cont_loss_weight = torch.autograd.grad(outputs= cont_loss,inputs= weight, retain_graph=True)
#bias.shape = ([128])
cont_loss_bias = torch.autograd.grad(outputs= cont_loss, inputs= bias, retain_graph=True)
output = output.view(output.shape[0], output.shape[1], h,w)
ctx.save_for_backward(input, weight, bias, output, cont_loss, cont_loss_weight,cont_loss_bias)
return output #output's shape = ([[batch_size= 100,128, 4, 4])
@staticmethod
def backward(ctx, grad_output): # grad_output size = ([batch_size, 128,4,4])
input, weight, bias, output, cont_loss,cont_loss_weight,cont_loss_bias = ctx.saved_tensors #input size = ([batch_size, 96,8,8])
grad_input = grad_weight = grad_bias = None
if ctx.needs_input_grad[0]:
grad_input = torch.nn.grad.conv2d_input(input.shape, weight, grad_output) #shape = ([batch_size,96,8,8])
if ctx.needs_input_grad[1]:
grad_weight = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output) #shape = ([128,96,5,5])
grad_weight += cont_loss_weight
if bias is not None and ctx.needs_input_grad[2]:
grad_bias = grad_output.sum((0,2,3)) #shape = ([128])
grad_bias += cont_loss_bias
if bias is not None:
return grad_input, grad_weight, grad_bias, None, None
else:
return grad_input, grad_weight, None, None
Then I observed that cont_loss_weight
is a tuple object containing two tensors each of shape ([96, 5, 5]). It should have returned a tensor of shape ([128, 96, 5, 5]) instead of tuple. And similarly for cont_loss_bias
, a tensor of shape ([128]).
I don’t know why!
Moreover, when I do `cont_loss_weight = torch.autograd.grad(outputs= cont_loss,inputs= weight, retain_graph=True), I am guessing grad_weight in backward will get affected. I have to keep retain_graph= True as well.
So to avoid that, when I used a copy of parameters i.e. cont_loss_weight = torch.autograd.grad(outputs= cont_loss,inputs= weight1, retain_graph=True)
I got this error
RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.