I want to run multiple GPUs with nn.DataParellel. Sorry, I only can share some part of code
device = "cuda:0"
model = nn.DataParallel(model, device_ids=[0, 1]).to(device)
origin_model = nn.DataParallel(origin_model, device_ids=[0, 1]).to(device)
for iepoch in range(5):
pbar = tqdm(train_dataloader, desc="QAT", total=1000)
for ibatch, imgs in enumerate(pbar):
imgs = imgs.to(device)
with amp.autocast(enabled=True):
model(imgs)
But I got error at line model(imgs)
File "test_qat.py", line 195, in <module>
model(imgs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 168, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 178, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
output.reraise()
File "/usr/local/lib/python3.6/dist-packages/torch/_utils.py", line 434, in reraise
raise exception
RuntimeError: Caught RuntimeError in replica 1 on device 1.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/GSOL_lossless_AI/yolov7_multiple_GPUs/models/yolo.py", line 599, in forward
return self.forward_once(x, profile) # single-scale inference, train
File "/GSOL_lossless_AI/yolov7_multiple_GPUs/models/yolo.py", line 625, in forward_once
x = m(x) # run
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1120, in _call_impl
result = forward_call(*input, **kwargs)
File "/GSOL_lossless_AI/yolov7_multiple_GPUs/models/common.py", line 111, in fuseforward
return self.act(self.conv(x))
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/quant_conv.py", line 120, in forward
quant_input, quant_weight = self._quant(input)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/quant_conv.py", line 85, in _quant
quant_input = self._input_quantizer(input)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/tensor_quantizer.py", line 346, in forward
outputs = self._quant_forward(inputs)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/tensor_quantizer.py", line 310, in _quant_forward
outputs = fake_tensor_quant(inputs, amax, self._num_bits, self._unsigned, self._narrow_range)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/tensor_quant.py", line 306, in forward
outputs, scale = _tensor_quant(inputs, amax, num_bits, unsigned, narrow_range)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/tensor_quant.py", line 354, in _tensor_quant
outputs = torch.clamp((inputs * scale).round_(), min_bound, max_bound)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!
I follow as this guide How to Use Multiple GPUs in PyTorch | Saturn Cloud Blog and it works. Thanks.