Code for loss function
class ProxyNCA(torch.nn.Module):
def __init__(self, batch_num, sz_embed):
torch.nn.Module.__init__(self)
self.proxies = torch.nn.Parameter(torch.randn(batch_num, sz_embed) / 8)
# self.proxies= torch.nn.Parameter(torch.FloatTensor([[4,5,6],[4,5,6]]))
# answer of this is 0.0025
# print(self.proxies.shape)
def pairwise_distance(self,a,b):
return torch.sub(a,b).pow(2)
def proxyNCAloss(self,X,P):
pdist = nn.PairwiseDistance(p=2, keepdim=True)
nume_exp=self.pairwise_distance(X,P)
vect_denom= self.pairwise_distance_self(X)
denom= (torch.exp(-1*vect_denom)).sum(-1)
nume_exp=torch.exp(-1*nume_exp)
proxy_nca_loss= (nume_exp.cuda()/denom.cuda())
return proxy_nca_loss.sum()
def pairwise_distance_self(self,X):
final_tensor=X
processed=torch.zeros([X.size()[0],X.size()[1]])
for i in range(0,X.size()[1]):
for j in range(0,X.size()[1]):
if(i==j):
continue
processed[0,i]=torch.add(torch.sub(final_tensor[0,i],final_tensor[0,j]).pow(2),processed[0,i])
return processed
def forwardold(self, X):
P = self.proxies.double()
P = 3 * F.normalize(P, p = 2, dim = -1)
# X = 3 * F.normalize(X, p = 2, dim = -1)
pdist = nn.PairwiseDistance(p=2, keepdim=True)
nume_exp=self.pairwise_distance(X,P)
vect_denom= self.pairwise_distance_self(X)
denom= (torch.exp(-1*vect_denom)).sum()
# proxy_nca_loss=-1* torch.log( (torch.exp(-1*nume_exp)/denom).sum())
proxy_nca_loss= (torch.exp(-1*nume_exp)/denom).sum()
return proxy_nca_loss
def forward(self, X):
P = self.proxies
P = 3 * F.normalize(P, p = 2, dim = -1)
X = 3 * F.normalize(X, p = 2, dim = -1)
batchwise_lose = torch.zeros_like(X)
if(X.size()[0]==1):
return self.forwardold(X)
for i in range(0,X.size()[0]):
# >>> a[0].resize(a[0].size()[0],1).t().shape
# print(batchwise_lose[i].size())
# print(X[i].resize(X[i].size()[0],1).t())
batchwise_lose[i]=self.proxyNCAloss(X[i].resize(X[i].size()[0],1).t(),
P[i].resize(P[i].size()[0],1).t()).to(device)
#
# taking sum of all the losses within a batch and then taking mean of all
# the batches as a representative loss
# print(batchwise_lose)
return batchwise_lose.sum(-1).mean()
Error
RuntimeError Traceback (most recent call last)
<ipython-input-22-0db6b9a7fe1d> in <module>()
143
144 for epoch in range(2):
--> 145 train(epoch)
146 # try:
147 # data, target = next(dataloader_iterator)
2 frames
/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
91 Variable._execution_engine.run_backward(
92 tensors, grad_tensors, retain_graph, create_graph,
---> 93 allow_unreachable=True) # allow_unreachable flag
94
95
RuntimeError: Function AddBackward0 returned an invalid gradient at index 1 - expected type torch.FloatTensor but got torch.cuda.FloatTensor
I am simply passing cifar dataset into a resnet which is deployed on cuda and is deploying its output to cuda as well. But when i pass the output to my loss function it gives me two kind of errors. One is posted above. This occurs when proxy_nca_loss= (nume_exp.cuda()/denom.cuda()) line of function ** def proxyNCAloss(self,X,P)** has all variables set to cuda. When i remove cuda from the code statement mentioned, i get following error.
RuntimeError Traceback (most recent call last)
<ipython-input-10-0db6b9a7fe1d> in <module>()
143
144 for epoch in range(2):
--> 145 train(epoch)
146 # try:
147 # data, target = next(dataloader_iterator)
3 frames
<ipython-input-5-b540c683e63a> in proxyNCAloss(self, X, P)
23
24
---> 25 proxy_nca_loss= (nume_exp.cuda()/denom)
26
27 return proxy_nca_loss.sum()
RuntimeError: expected backend CUDA and dtype Float but got backend CPU and dtype Float
you can access full code here: https://colab.research.google.com/drive/1ZB90nQl1QplPOmI9EDFi1LFD0aoK2WwA
Looking forward to positive helpful. You can edit the notebook as well.