Hi all, I am implementing the Cluster Loss described in https://arxiv.org/abs/1812.10325 but I am stuck in a problem. Following the implementation of the loss.
The problem is that the backward function does not change anything, and I have a static loss result. I know that the problem is in autograd graph.
class ClusterLoss(nn.Module):
def __init__(self,alpha=0.2):
super(ClusterLoss,self).__init__()
self.alpha = alpha
self.ranking_loss = nn.SoftMarginLoss()
self.clusters_sum = []
self.clusters_labels = []
self.clusters_count = []
def forward(self,feats,targets):
t_intra,D_intra = self.Euclidean_intra(feats,targets)
t_inter,D_inter = self.Euclidean_inter(targets)
Y = (torch.Tensor(t_intra).data.new().resize_as_(torch.Tensor(t_intra).data).fill_(1))
Y = Y.to(device)
loss = self.ranking_loss((D_intra-D_inter)+self.alpha,Y)
return loss
def mean_feats(self,feats,targets):
N = feats.size(0)
# shape [N, N]
is_pos = targets.expand(N, N).eq(targets.expand(N, N).t())
is_neg = targets.expand(N, N).ne(targets.expand(N, N).t())
target_batch = []
for i in range(N):
t = targets[i]
if not t in target_batch:
a= feats[is_pos[:,i],:]#list of features computed over the same individual
sum_a = torch.sum(a,dim=0)
if self.clusters_sum:
if self.clusters_labels:
if t in self.clusters_labels:
j = self.clusters_labels.index(t.item())#(self.cluster_labels == t.item()).nonzero()
self.clusters_sum[j] += sum_a
self.clusters_count[j] += a.size(0)
else:
self.clusters_sum.append(sum_a)
self.clusters_labels.append(t)
self.clusters_count.append(a.size(0))
else:
print('There are no labels {}'.format(M_label))
else:
self.clusters_sum.append(sum_a)
self.clusters_labels.append(t.item())
self.clusters_count.append(a.size(0))
target_batch.append(t)
#print(self.clusters_count)
def mean_feats_compute(self):
clusters_sum = (torch.stack(self.clusters_sum)).to(device)
#print('clusters_sum {}'.format(clusters_sum.size()))
clusters_count = (torch.Tensor(self.clusters_count)).to(device)
#print('clusters_count {}'.format(clusters_count))
M_emb = clusters_sum/clusters_count.unsqueeze(1)
self.M_emb = M_emb.to(device)
# print('M_emb {}'.format(self.M_emb))
def Euclidean_intra(self,feats,targets):
M_intra = self.M_emb
D = losses.euclidean_distance(feats,M_intra)
#print(D)
N = feats.size(0)
is_pos = targets.expand(N, N).eq(targets.expand(N, N).t())
target_intra = []
D_intra = []
for i in range(N):
if not targets[i].item() in target_intra:
D_id = D[is_pos[:,i],:]
target_intra.append(targets[i].item())
index_mean = self.clusters_labels.index(targets[i].item())
#ERRORE >
# i_mean_dist = []
# for j in range(len(D_id)):
# i_mean_dist.append(D_id[j,index_mean])
#ERRORE<
D_intra.append(torch.max(D_id[:,index_mean])) #ERRORE torch.stack(i_mean_dist)))#.item())
#ERRORE>
# D_intra = torch.Tensor(D_intra)
#ERRORE <
D_intra = torch.stack(D_intra)
D_intra = D_intra.to(device)
#print(D_intra)
return target_intra, D_intra
def Euclidean_inter(self,targets):
M_intra = self.M_emb
N = targets.size(0)
is_neg = targets.expand(N, N).ne(targets.expand(N, N).t())
target_inter = []
D_inter= []
for i in range(N):
if not targets[i].item() in target_inter:
index_mean = self.clusters_labels.index(targets[i].item())
M = M_intra[index_mean,:]
target_inter.append(targets[i].item())
list_inter = []
for j in range(len(M_intra)):
if not j == index_mean:
X = (M_intra[j,:])
list_inter.append(torch.pairwise_distance(M.unsqueeze(1),X.unsqueeze(1),2))
D_inter.append(torch.min(torch.stack(list_inter)))
D_inter = torch.Tensor(D_inter)
D_inter = D_inter.to(device)
D_inter.requires_grad_()
#print(D_inter)
return target_inter, D_inter
def class_mean_emb(self,targets):
N = self.M_emb.size(0)
targets_mean = []
mean_emb = []
for i in range(N):
if not targets[i].item() in targets_mean:
targets_mean.append(targets[i].item())
mean_emb.append(self.M_emb[i,:])
return torch.stack(mean_emb),targets_mean
In the def Euclidean_intra I defined a list that was i_mean_dist = [] that was detached from the autograd graph and that made the backward method not able to update the weight.