I can run my code on a gpu,but can’t run it on multi-gpus.And i paste error information.
Traceback (most recent call last):
File "/home/guest_a/yss/mmseg_change1/tools/train.py", line 242, in <module>
main()
File "/home/guest_a/yss/mmseg_change1/tools/train.py", line 231, in main
train_segmentor(
File "/home/guest_a/yss/mmseg_change1/mmseg/apis/train.py", line 222, in train_segmentor
runner.run(data_loaders, cfg.workflow)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/iter_based_runner.py", line 134, in run
iter_runner(iter_loaders[i], **kwargs)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/iter_based_runner.py", line 67, in train
self.call_hook('after_train_iter')
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/base_runner.py", line 309, in call_hook
getattr(hook, fn_name)(self)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/hooks/optimizer.py", line 56, in after_train_iter
runner.outputs['loss'].backward()
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/torch/_tensor.py", line 363, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/torch/autograd/__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
Traceback (most recent call last):
File "/home/guest_a/yss/mmseg_change1/tools/train.py", line 242, in <module>
main()
File "/home/guest_a/yss/mmseg_change1/tools/train.py", line 231, in main
train_segmentor(
File "/home/guest_a/yss/mmseg_change1/mmseg/apis/train.py", line 222, in train_segmentor
runner.run(data_loaders, cfg.workflow)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/iter_based_runner.py", line 134, in run
iter_runner(iter_loaders[i], **kwargs)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/iter_based_runner.py", line 67, in train
self.call_hook('after_train_iter')
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/base_runner.py", line 309, in call_hook
getattr(hook, fn_name)(self)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/mmcv/runner/hooks/optimizer.py", line 56, in after_train_iter
runner.outputs['loss'].backward()
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/torch/_tensor.py", line 363, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/guest_a/anaconda3/envs/openmmlab_3.9.7/lib/python3.9/site-packages/torch/autograd/__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
Hi, @ptrblck,i always see you help other people,i have no idea what can i do.Please help me.