I need to evaluate my results on multi gpus. However, the size of each input is various, so my dataloader’s batch size can only be 1. Therefore, I write a implementation of multi-gpu evaluation. However, there is a problem, tensors are on different gpus
, while I have set the device of model and device same.
Some codes are below:
class Worker(threading.Thread):
def __init__(self, gpu_idx, network, model, shred_list, results, lock, device=None):
super(Worker, self).__init__()
# self.val_func = val_func
torch.cuda.set_device(device)
self.network = network
self.model = model
self.gpu_idx = gpu_idx
self.shred = shred_list
self.results = results
self.lock = lock
self.device = device
def run(self):
for idx in self.shred:
item_idx = self.gpu_idx * stride + idx
dd = dataset[idx]
img = dd['data']
label = dd['label']
name = dd['fn']
with torch.cuda.device(self.device):
val_func = load_model(self.network, self.model, self.device)
# print(torch.cuda.device_of(self.network))
pred = evaluation_single_image(val_func, config.num_classes, img, config.image_mean, config.image_std,
config.eval_crop_size, config.eval_scale_array, config.eval_flip, device=self.device)