Hi,
I have a custom loss function, but when I use it with my model, its parameters do not change, but when I use built-in cross-entropy loss, it changes. I dont know where is my problem??? Could someone help me out???
This is my loss function:
class SkNN_loss(nn.Module):
def __init__(self):
super(SkNN_loss, self).__init__()
def pairwise_euclid_distance(self, A, B):
"""Pairwise Euclidean distance between two matrices.
:param A: a matrix.
:param B: a matrix.
:returns: A tensor for the pairwise Euclidean between A and B.
"""
dist = A - B
euc_dist = torch.norm(dist, p=2)
return euc_dist
def pairwise_cos_distance(self, A, B):
"""Pairwise cosine distance between two matrices.
:param A: a matrix.
:param B: a matrix.
:returns: A tensor for the pairwise cosine between A and B.
"""
# l2 normalized vectors
norm_A = A.norm(p=2, keepdim=True)
norm_B = B.norm(p=2, keepdim=True)
A = A.div(norm_A)
B = A.div(norm_B)
prod = torch.mul(A, B)
return 1 - prod
def forward(self, features, labels, T):
b = len(features)
neighbors = 0
total = 0
l_sn = 0
for i, x_i in enumerate(features):
for j, x_j in enumerate(features):
if i != j:
dist = self.pairwise_euclid_distance(x_i, x_j)
dist = -(dist / T)
dist = torch.exp(dist)
if labels[i] == labels[j]:
neighbors += dist
total += dist
if neighbors != 0:
l_sn += torch.log(neighbors / total)
neighbors = 0
total = 0
l_sn = l_sn * (-1 / b)
return l_sn
and this is my training:
def train_test(self, model, criterion, epoch, phase, optimizer, args, logger, use_gpu):
# Set Dataset loader
if phase == 'train':
dataset = self.train_loader
elif phase == 'test':
dataset = self.test_loader
else:
dataset = self.val_loader
# Set model mode
if phase == 'test' or phase == 'val':
model.eval()
if phase == 'train':
model.train()
logger.info('-' * 10)
logger.info('Epoch {}/{}'.format(epoch, args.epochs - 1))
logger.info('Current Learning rate: {}'.format(showLR(optimizer)))
Temperature = 100
running_loss = 0
running_corrects = 0
running_all = 0
top_k = 0
model_loss = model.sknn_loss
for batch_idx, (inputs, targets) in enumerate(dataset):
since = time.time()
loss_CEntropy_array = []
loss_cluster_array = []
inputs = inputs.float().permute(0, 2, 1, 3, 4)
if use_gpu:
model.to(device)
inputs, targets = inputs.to(device), targets.to(device)
if phase != 'train':
with torch.no_grad():
outputs, features = model(inputs)
else:
outputs, features = model(inputs)
output_tmp = F.softmax(outputs, dim=1).data
_, preds = torch.max(output_tmp, 1)
_, top_k_index = torch.topk(output_tmp, self.k)
# print('!!!!!!!!!!!!!!!!!!!!!!!',preds, targets, outputs)
loss_CEntropy = criterion(outputs, targets)
loss_kNN = model_loss(features, targets, Temperature)
print(loss_kNN.grad, loss_kNN)
loss_CEntropy_array.append(self.cross_entr_coef * loss_CEntropy.item())
loss_cluster_array.append(self.kNN_coef * loss_kNN.item())
# L2 Regularization
l2 = 0
for W in model.parameters():
# l2 += torch.norm(W, p=2)
# print(W.requires_grad)
pass
final_loss = self.kNN_coef * loss_kNN + self.cross_entr_coef * loss_CEntropy
if phase == 'train':
print(W)
# print(list(model.parameters())[0].grad)
optimizer.zero_grad()
final_loss.backward()
optimizer.step()
running_loss += final_loss.item()
for jj in range(len(preds)):
if preds[jj] == targets.data[jj]:
running_corrects += 1
if targets.data[jj] in top_k_index[jj]:
top_k += 1
running_all += len(inputs)
# Write info in logger
cost_time = time.time() - since
logger.info(
'Process: [{:5.0f}/{:5.0f} ({:.0f}%), (running_corrects/running_all): ({}/{})]\t'
'Top-k: {}, Top-2 acc: {}\n'
'Loss: {:.4f}\tAcc:{:.4f}\n'
'Mean of Cross Entropy: {:.4f}, Mean of Cluster Entropy: {:.4f}\n'
'Cost time:{:5.0f}s\tRemaining time for the epoch:{:5.0f}s\r'.format(
running_all,
len(dataset.dataset),
100. * batch_idx / len(dataset),
running_corrects,
running_all,
top_k,
top_k / running_all,
running_loss / (batch_idx + 1),
running_corrects / running_all,
np.mean(loss_CEntropy_array),
np.mean(loss_cluster_array),
cost_time,
cost_time * (len(dataset) - batch_idx)
))
if batch_idx % args.interval == 0 or (batch_idx == len(dataset) - 1):
cost_time = time.time() - since
print(
'Process: [{:5.0f}/{:5.0f} ({:.0f}%), (running_corrects/running_all): ({}/{})]\t'
'Top-k: {}, Top-k acc: {}, '
'Loss: {:.4f}\tAcc:{:.4f}\tCost time:{:5.0f}s\tRemaining time for the epoch:{:5.0f}s\r'.format(
running_all,
len(dataset.dataset),
100. * batch_idx / len(dataset),
running_corrects,
running_all,
top_k,
top_k / running_all,
running_loss / (batch_idx + 1),
running_corrects / running_all,
cost_time,
cost_time * (len(dataset) - batch_idx)
))
final_loss = running_loss / (batch_idx + 1)
acc = running_corrects / len(dataset.dataset)
logger.info('*******************************Final Epoch Results*******************************')
logger.info('{} Epoch:\t{:2}\tTotal Loss: {:.4f}\tAcc:{:.4f}'.format(
phase,
epoch,
final_loss,
acc
))
logger.info(
'****************************************************************************************************************************')
if phase == 'train':
torch.save(model.state_dict(), args.save_path + '/' + args.mode + '_' + str(epoch + 1) + '.pt')
return model, final_loss, acc
else:
return final_loss, acc