Hi, I try to create a memory bank to store image features along with their labels. I am utilizing 2 nodes, each equipped with 4 GPUs. I aim for this memory bank to be shared among all processes and be used for updates during training. I have this code snippet.
@torch.no_grad()
def eval_build_bank(model, data_loader, len_dataset, device, world_size):
features = torch.zeros(len_dataset, model.model.visual.classifier.shape[0]).to(device)
labels = torch.zeros(len_dataset, dtype=torch.long).to(device)
for _, batch in enumerate(data_loader):
inputs, _, idx = batch
image = inputs[0].to(device)
with torch.no_grad():
logit, feats = model(image)
features[idx] = feats.detach()
proba = torch.softmax(logit, dim=-1)
pseudo_targets = torch.argmax(proba, dim=-1)
labels[idx] = pseudo_targets.detach()
dist.all_reduce(features, op=dist.ReduceOp.SUM)
dist.all_reduce(labels, op=dist.ReduceOp.SUM)
bank = {
"features" : features,
"labels" : labels
}
return bank
Is this correct?