When I add the break in the inner for loop, I expect to get the loss for only 1 batch. Not sure why I get 10 different lines with different loss values
loss_list=[]
for epoch in range(num_epochs):
model.train()
l=0
for i, values in enumerate(train_iter):
input_data = []
for v in values:
input_data.append(v.to(device)) #you should load here to GPU's if you have more than 1 GPU
p_pos = [model(*t) for t in zip(*input_data[0:-1])]
p_neg = [model(*t) for t in zip(*input_data[0:-2], input_data[-1])]
ls = [loss(p, n) for p, n in zip(p_pos, p_neg)]
optimizer.zero_grad()
[l.backward(retain_graph=False) for l in ls]
l += sum([l for l in ls]) #you should normalize by the number of devices if you have more than 1 GPU
optimizer.step()
print(i, l)
break
loss_list.append(l)
Here are the results:
0 tensor([[572.0556]], device=‘cuda:0’, grad_fn=)
0 tensor([[560.5846]], device=‘cuda:0’, grad_fn=)
0 tensor([[564.1666]], device=‘cuda:0’, grad_fn=)
0 tensor([[562.2694]], device=‘cuda:0’, grad_fn=)
0 tensor([[546.3769]], device=‘cuda:0’, grad_fn=)
0 tensor([[564.4094]], device=‘cuda:0’, grad_fn=)
0 tensor([[552.5000]], device=‘cuda:0’, grad_fn=)
0 tensor([[537.7695]], device=‘cuda:0’, grad_fn=)
0 tensor([[540.4388]], device=‘cuda:0’, grad_fn=)
0 tensor([[543.6255]], device=‘cuda:0’, grad_fn=)