Hello. I am using a loss function BCE Loss for Siamese Network. The inputs to loss function are target (y_true
) and input (dy
) in my case. 'dy'
is the pairwise distance between output embeddings produced from siamese network and 'y_true'
are the true labels.
The true labels are between 0 and 1, and dy is also between 0 and 1.
When I use the above loss function, it gives me the error:
ValueError: Using a target size (torch.Size([2, 1])) that is different to the input size (torch.Size([2])) is deprecated. Please ensure they have the same size.
The following code is for train_epoch function:
def train_epoch(train_loader, model, loss_fn, optimizer, cuda, log_interval, metrics):
for metric in metrics:
metric.reset()
model.train()
losses = []
total_loss = 0
for batch_idx, ((x0, x1), y) in enumerate(train_loader):
x0, x1, y_true = x0.cpu(), x1.cpu(), y.cpu()
#y_true = (y_true, (2,))
optimizer.zero_grad()
output1, output2 = model(x0, x1)
#output1 = np.reshape(output1, (2,1))
#output2 = np.reshape(output2, (2,1))
#y_true = np.reshape(y_true, (2))
#print('output1 is:', output1)
#print('output2 is:', output2)
#print('y_true is', y_true)
'''Distance metric - PairwiseDistance'''
p_dist = torch.nn.PairwiseDistance(keepdim=True)
dy = p_dist(output1, output2)
#dy = torch.reshape(dy, (2,1))
#print('dy_shape after squeeze', dy.size())
dy = torch.nan_to_num(dy)
#y_true = torch.reshape(y_true, (2,))
#print('y_true_shape after squeeze', y_true.size())
y_true = torch.nan_to_num(y_true)
#print('dy is:', dy)
'''2 lines indicated the normalization of dy to 0 and 1 by dividing it with max value'''
maximum_dy = torch.max(dy)
maximum_dy = torch.nan_to_num(maximum_dy)
#print('maximum dy is', maximum_dy )
dy = dy/maximum_dy
#print('dy after max is', dy)
#print('y_true is', y_true)
maximum_y_true = torch.max(y_true)
maximum_y_true = torch.nan_to_num(maximum_y_true)
#print('maximum y_true is', maximum_y_true)
y_true = y_true/maximum_y_true
#print(' y_true after max is', y_true)
#print('dy_shape:', dy.size(), 'y_true_shape:', y_true.size())
#print('dy_shape after squeeze', dy.size())
dy = torch.unsqueeze(dy,1)
y_true = torch.unsqueeze(y_true, 1)
loss = loss_fn(dy, y_true)
loss.backward()
optimizer.step()
losses.append(loss.item())
total_loss += loss.item()
for metric in metrics:
metric(dy, y_true)
for metric in metrics:
metric.total = (batch_idx+1)*y_true.shape[0]
if batch_idx % log_interval == 0:
message = 'Train: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
batch_idx, len(train_loader),
100. * batch_idx / len(train_loader), np.mean(losses))
for metric in metrics:
message += '\t{}: {}'.format(metric.name(), metric.value())
print(message)
losses = []
total_loss /= (batch_idx + 1)
return total_loss, metrics
Following code is where the loss function is called:
loss_fn = torch.nn.BCELoss()
lr = 1e-3
optimizer = optim.SGD(model.parameters(), lr=lr)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 2
log_interval = 1
I would appreciate some help here @ptrblck
Thanks in advance.