Running the code with CUDA_LAUCH_BLOCKING=1
gives the following
Namespace(batch_size=2, caption_path='/home/ashishverma/Documents/Codes/LSTM_eye_traj/data/train/', crop_size=224, embed_size=256, hidden_size=512, learning_rate=0.001, log_step=10, model_path='models/', num_epochs=5, num_layers=1, num_workers=2, save_step=1000, vocab_length=2552)
torch.Size([24, 256])
Traceback (most recent call last):
File "lstm_train.py", line 105, in <module>
main(args)
File "lstm_train.py", line 62, in main
outputs = decoder(features, captions, lengths)
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/ashishverma/Documents/Codes/LSTM_eye_traj/lstm_model.py", line 44, in forward
hiddens, _ = self.lstm(packed)
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 192, in forward
output, hidden = func(input, self.all_weights, hx, batch_sizes)
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 323, in forward
return func(input, *fargs, **fkwargs)
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 273, in forward
handle = cudnn.get_handle()
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/__init__.py", line 358, in get_handle
handle = CuDNNHandle()
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/__init__.py", line 172, in __init__
check_error(lib.cudnnCreate(ctypes.byref(ptr)))
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/__init__.py", line 345, in check_error
raise CuDNNError(status)
torch.backends.cudnn.CuDNNError: 2: b'CUDNN_STATUS_ALLOC_FAILED'
Exception ignored in: <bound method CuDNNHandle.__del__ of <torch.backends.cudnn.CuDNNHandle object at 0x7fb5f8bb84e0>>
Traceback (most recent call last):
File "/home/ashishverma/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/__init__.py", line 176, in __del__
check_error(lib.cudnnDestroy(self))
ctypes.ArgumentError: argument 1: <class 'TypeError'>: Don't know how to convert parameter 1