Hi !
I’m trying to build an object detection model. I tested it on CPU device, everything was good. But I set up my device to cuda, everything was wrong. First I had an error RuntimeError: CUDA error: device-side assert triggered cause of the line image[0].to(device) (with image a good tensor from the code image, target = next(iter(dataloader)) ). I fixed this error by running : CUDA_LAUNCH_BLOCKING=1 and restarting the kernel.
Then I launched this code imgs = [im.to(device) for im in image], it works but when I run imgs alone, I have an error : RuntimeError: CUDA error: invalid argument
and more specificly :
RuntimeError Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~/anaconda3/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
375 if cls in self.type_pprinters:
376 # printer registered in self.type_pprinters
--> 377 return self.type_pprinters[cls](obj, self, cycle)
378 else:
379 # deferred printer
~/anaconda3/lib/python3.8/site-packages/IPython/lib/pretty.py in inner(obj, p, cycle)
553 p.text(',')
554 p.breakable()
--> 555 p.pretty(x)
556 if len(obj) == 1 and type(obj) is tuple:
557 # Special case for 1-item tuples.
~/anaconda3/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~/anaconda3/lib/python3.8/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~/anaconda3/lib/python3.8/site-packages/torch/tensor.py in __repr__(self)
191 return handle_torch_function(Tensor.__repr__, (self,), self)
192 # All strings are unicode in Python 3.
--> 193 return torch._tensor_str._str(self)
194
195 def backward(self, gradient=None, retain_graph=None, create_graph=False, inputs=None):
~/anaconda3/lib/python3.8/site-packages/torch/_tensor_str.py in _str(self)
381 def _str(self):
382 with torch.no_grad():
--> 383 return _str_intern(self)
~/anaconda3/lib/python3.8/site-packages/torch/_tensor_str.py in _str_intern(inp)
356 tensor_str = _tensor_str(self.to_dense(), indent)
357 else:
--> 358 tensor_str = _tensor_str(self, indent)
359
360 if self.layout != torch.strided:
~/anaconda3/lib/python3.8/site-packages/torch/_tensor_str.py in _tensor_str(self, indent)
240 return _tensor_str_with_formatter(self, indent, summarize, real_formatter, imag_formatter)
241 else:
--> 242 formatter = _Formatter(get_summarized_data(self) if summarize else self)
243 return _tensor_str_with_formatter(self, indent, summarize, formatter)
244
~/anaconda3/lib/python3.8/site-packages/torch/_tensor_str.py in __init__(self, tensor)
88
89 else:
---> 90 nonzero_finite_vals = torch.masked_select(tensor_view, torch.isfinite(tensor_view) & tensor_view.ne(0))
91
92 if nonzero_finite_vals.numel() == 0:
RuntimeError: CUDA error: invalid argument
And then when i runned this line :
targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in target]
I found this error :
--------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-27-6d1a7f1a9662> in <module>
----> 1 targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in target]
<ipython-input-27-6d1a7f1a9662> in <listcomp>(.0)
----> 1 targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in target]
<ipython-input-27-6d1a7f1a9662> in <dictcomp>(.0)
----> 1 targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in target]
RuntimeError: CUDA error: invalid argument
Do someone have an idea how to fix it ?
Thanks!