reward = 3
def custom_loss(outputs,target):
outputs = F.softmax(outputs,dim=1)
outputs, reservation = outputs[:,:-1], outputs[:,-1]
gain = torch.gather(outputs, dim=1, index=target.unsqueeze(1)).squeeze()
return loss
I am trying to implement custom loss but it was giving runtime cuda error so i remove lines step by step and tried to run CNN. It works fine. BUT
The moment i enter
gain = torch.gather(outputs, dim=1, index=target.unsqueeze(1)).squeeze()
This line it through
RuntimeError: CUDA error: device-side assert triggered
ptrblck
December 18, 2020, 9:17am
2
The stack trace might point to an invalid index and you could rerun the code via:
CUDA_LAUNCH_BLOCKING=1 python script.py args
to see if this is indeed the line of code raising the error.
Also, if you run the code on the CPU you might get a better error message.
1 Like
ptrblck:
CUDA_LAUNCH_BLOCKING
I am using jupyter notebook so for that i have to
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# according to this
so now the error is
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-18-43362cbf6c2c> in <module>
1 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
2
----> 3 history = train_all()
4 history
<ipython-input-17-3bc06434873b> in train_all(model)
40 outputs=model(images.float())
41 loss = custom_loss(outputs,labels)
---> 42 print(loss)
43
44 loss.backward()
~/anaconda3/lib/python3.7/site-packages/torch/tensor.py in __repr__(self)
177 return handle_torch_function(Tensor.__repr__, relevant_args, self)
178 # All strings are unicode in Python 3.
--> 179 return torch._tensor_str._str(self)
180
181 def backward(self, gradient=None, retain_graph=None, create_graph=False):
~/anaconda3/lib/python3.7/site-packages/torch/_tensor_str.py in _str(self)
370 def _str(self):
371 with torch.no_grad():
--> 372 return _str_intern(self)
~/anaconda3/lib/python3.7/site-packages/torch/_tensor_str.py in _str_intern(self)
350 tensor_str = _tensor_str(self.to_dense(), indent)
351 else:
--> 352 tensor_str = _tensor_str(self, indent)
353
354 if self.layout != torch.strided:
~/anaconda3/lib/python3.7/site-packages/torch/_tensor_str.py in _tensor_str(self, indent)
239 return _tensor_str_with_formatter(self, indent, summarize, real_formatter, imag_formatter)
240 else:
--> 241 formatter = _Formatter(get_summarized_data(self) if summarize else self)
242 return _tensor_str_with_formatter(self, indent, summarize, formatter)
243
~/anaconda3/lib/python3.7/site-packages/torch/_tensor_str.py in __init__(self, tensor)
87
88 else:
---> 89 nonzero_finite_vals = torch.masked_select(tensor_view, torch.isfinite(tensor_view) & tensor_view.ne(0))
90
91 if nonzero_finite_vals.numel() == 0:
RuntimeError: CUDA error: device-side assert triggered
Rajan_Lagah:
target.unsqueeze(1)
As you said.
I log value of each variable and found that target.unsqueeze(1)
is giving value more than valid index.
I then verify output layer of my model was wrong.
Thankyou