I got a piece of code from:
def train(train_loader, model, optimizer, epoch):
# switch to train mode
model.train()
pbar = tqdm(enumerate(train_loader))
labels, distances = [], []
for batch_idx, (data_a, data_p, data_n,label_p,label_n) in pbar:
data_a, data_p, data_n = data_a.cuda(), data_p.cuda(), data_n.cuda()
data_a, data_p, data_n = Variable(data_a), Variable(data_p), \
Variable(data_n)
# compute output
out_a, out_p, out_n = model(data_a), model(data_p), model(data_n)
# Choose the hard negatives
d_p = l2_dist.forward(out_a, out_p)
d_n = l2_dist.forward(out_a, out_n)
all = (d_n - d_p < args.margin).cpu().data.numpy().flatten()
hard_triplets = np.where(all == 1)
if len(hard_triplets[0]) == 0:
continue
out_selected_a = Variable(torch.from_numpy(out_a.cpu().data.numpy()[hard_triplets]).cuda())
out_selected_p = Variable(torch.from_numpy(out_p.cpu().data.numpy()[hard_triplets]).cuda())
out_selected_n = Variable(torch.from_numpy(out_n.cpu().data.numpy()[hard_triplets]).cuda())
selected_data_a = Variable(torch.from_numpy(data_a.cpu().data.numpy()[hard_triplets]).cuda())
selected_data_p = Variable(torch.from_numpy(data_p.cpu().data.numpy()[hard_triplets]).cuda())
selected_data_n = Variable(torch.from_numpy(data_n.cpu().data.numpy()[hard_triplets]).cuda())
selected_label_p = torch.from_numpy(label_p.cpu().numpy()[hard_triplets])
selected_label_n= torch.from_numpy(label_n.cpu().numpy()[hard_triplets])
triplet_loss = TripletMarginLoss(args.margin).forward(out_selected_a, out_selected_p, out_selected_n)
cls_a = model.forward_classifier(selected_data_a)
cls_p = model.forward_classifier(selected_data_p)
cls_n = model.forward_classifier(selected_data_n)
criterion = nn.CrossEntropyLoss()
predicted_labels = torch.cat([cls_a,cls_p,cls_n])
true_labels = torch.cat([Variable(selected_label_p.cuda()),Variable(selected_label_p.cuda()),Variable(selected_label_n.cuda())])
cross_entropy_loss = criterion(predicted_labels.cuda(),true_labels.cuda())
loss = cross_entropy_loss + triplet_loss
# compute gradient and update weights
optimizer.zero_grad()
loss.backward()
optimizer.step()
# update the optimizer learning rate
adjust_learning_rate(optimizer)
# log loss value
logger.log_value('triplet_loss', triplet_loss.data[0]).step()
logger.log_value('cross_entropy_loss', cross_entropy_loss.data[0]).step()
logger.log_value('total_loss', loss.data[0]).step()
if batch_idx % args.log_interval == 0:
pbar.set_description(
'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} \t # of Selected Triplets: {}'.format(
epoch, batch_idx * len(data_a), len(train_loader.dataset),
100. * batch_idx / len(train_loader),
loss.data[0],len(hard_triplets[0])))
dists = l2_dist.forward(out_selected_a,out_selected_n) #torch.sqrt(torch.sum((out_a - out_n) ** 2, 1)) # euclidean distance
distances.append(dists.data.cpu().numpy())
labels.append(np.zeros(dists.size(0)))
dists = l2_dist.forward(out_selected_a,out_selected_p)#torch.sqrt(torch.sum((out_a - out_p) ** 2, 1)) # euclidean distance
distances.append(dists.data.cpu().numpy())
labels.append(np.ones(dists.size(0)))
labels = np.array([sublabel for label in labels for sublabel in label])
distances = np.array([subdist[0] for dist in distances for subdist in dist])
tpr, fpr, accuracy, val, val_std, far = evaluate(distances,labels)
print('\33[91mTrain set: Accuracy: {:.8f}\n\33[0m'.format(np.mean(accuracy)))
logger.log_value('Train Accuracy', np.mean(accuracy))
plot_roc(fpr,tpr,figure_name="roc_train_epoch_{}.png".format(epoch))
# do checkpointing
torch.save({'epoch': epoch + 1, 'state_dict': model.state_dict()},
'{}/checkpoint_{}.pth'.format(LOG_DIR, epoch))
as you can see,there are a bunch of model.forward(…) invoking:
out_a, out_p, out_n = model(data_a), model(data_p), model(data_n)
....
cls_a = model.forward_classifier(selected_data_a)
cls_p = model.forward_classifier(selected_data_p)
cls_n = model.forward_classifier(selected_data_n)
(model.forward_classifier() will invoke model.forward() also)
due to loss.backward() invoked,before second iteration beginning,a little bit gpu memory was release,but more than 30G gpu memory not(I have 4x1080ti),then training crashed with run out of GPU memory,so question are:
1,In a nn.module,how(or where) is computation graph saved,how to release gpu memory?
2,Must to call backward()?if yes,what should be in .backward() method?
3,In my case,which part of GPU memory is released,which part of them is kept?Is there any way to see it?