My model was run fine but after 89 epochs, I got this error:
Traceback (most recent call last):
File "main.py", line 36, in <module>
main()
File "main.py", line 32, in main
method.method(settings, job_id)
File "/scratch/project_2002806/tranan11/AC-hybrid-transformer/processes/method.py", line 574, in method
nb_classes=nb_classes)
File "/scratch/project_2002806/tranan11/AC-hybrid-transformer/processes/method.py", line 328, in _do_training
optimizer=None)
File "/scratch/project_2002806/tranan11/AC-hybrid-transformer/tools/model.py", line 156, in module_epoch_passing
y_hat, y, f_names_tmp = module_forward_passing(example, module, use_y)
File "/scratch/project_2002806/tranan11/AC-hybrid-transformer/tools/model.py", line 217, in module_forward_passing
return module(x, None), y, f_names
File "/appl/soft/ai/miniconda3/envs/pytorch-1.3.1-1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in __call__
result = self.forward(*input, **kwargs)
File "/.../models/wavenet_rnn.py", line 94, in forward
return self._inference(x)
File "/.../models/wavenet_rnn.py", line 139, in _inference
self.max_length,
File "/.../modules/decode_utils.py", line 22, in greedy_decode
attention_mask = None
File "/appl/soft/ai/miniconda3/envs/pytorch-1.3.1-1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in __call__
result = self.forward(*input, **kwargs)
File "/..../AC-hybrid-transformer/modules/transformer.py", line 31, in forward
s_mask = subsequent_mask(word_embed.size(0)).to(device)
RuntimeError: CUDA error: device-side assert triggered
This is the code where it triggers this error:
def subsequent_mask(sz):
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1).float()
mask = mask.masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
return mask