Please resolve the below error, PyTorch version = ‘2.3.0+cu118’ and CUDA version of the DGX server is 11.5 (–nvcc version)
Getting the below error
Traceback (most recent call last):
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 306, in _lazy_init
queued_call()
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 174, in _check_capability
capability = get_device_capability(d)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 430, in get_device_capability
prop = get_device_properties(device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "../aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/aryan/FSCIL/FaceKD/train_test.py", line 397, in <module>
main(config)
File "/home/aryan/FSCIL/FaceKD/train_test.py", line 66, in main
base = BasePatchKD(config, loaders)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/aryan/FSCIL/FaceKD/pkd/core/base_patch_kd.py", line 54, in __init__
self._init_model()
File "/home/aryan/FSCIL/FaceKD/pkd/core/base_patch_kd.py", line 74, in _init_model
param.data = param.data.cuda()
^^^^^^^^^^^^^^^^^
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "../aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/home/aryan/FSCIL/FaceKD/train_test.py", line 5, in <module>
from pkd.utils import set_random_seed, time_now
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1310, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 995, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/aryan/FSCIL/FaceKD/pkd/__init__.py", line 3, in <module>
from pkd import core, data_loader, models, evaluation, utils, visualization, losses, operation
File "<frozen importlib._bootstrap>", line 1415, in _handle_fromlist
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 995, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/aryan/FSCIL/FaceKD/pkd/core/__init__.py", line 3, in <module>
from .lr_schedulers import WarmupMultiStepLR
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 995, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/aryan/FSCIL/FaceKD/pkd/core/lr_schedulers.py", line 1, in <module>
import torch
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 995, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/__init__.py", line 1478, in <module>
_C._initExtension(manager_path())
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 995, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 238, in <module>
_lazy_call(_check_capability)
File "/home/aryan/miniconda3/envs/facekd_new/lib/python3.12/site-packages/torch/cuda/__init__.py", line 235, in _lazy_call
_queued_calls.append((callable, traceback.format_stack()))
import torch
and torch.cuda.is_available()
are working fine