Hi everyone,
GPU error occured!
I am coding encoder-decoder model.
Is there some to solve my issue?
If you had same problem and solved it, could you help me?
class Encoder_Decoder(nn.Module):
def __init__(self, input_size, output_size, hidden_size):
super(Encoder_Decoder, self).__init__()
self.embed_input = nn.Embedding(input_size, hidden_size, padding_idx=-1)
self.embed_target = nn.Embedding(output_size, hidden_size, padding_idx=-1)
self.lstm1 = nn.LSTMCell(hidden_size, hidden_size)
self.linear1 = nn.Linear(hidden_size, output_size)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
def forward(self, input_lines ,target_lines):
# global all_loss
hx = torch.zeros(batch_size, self.hidden_size).cuda()
cx = torch.zeros(batch_size, self.hidden_size).cuda()
for input_sentence_words in input_lines:
input_k = self.embed_input(input_sentence_words)
print("input_k", input_k)
hx, cx = self.lstm1(input_k, (hx, cx) )
print("hx", hx.size())
return 0
Error message
input_k tensor([[ 1.1848, 0.1343, 0.4040, ..., -0.9256, -0.0975, 0.1987],
[ 1.2179, -1.4551, -0.1774, ..., 0.9071, 1.3571, 0.6015],
[ 1.2011, 1.2342, -0.8026, ..., -1.4794, -0.7845, 0.2520],
...,
[ 0.3803, 0.5644, 0.9808, ..., -2.3397, -0.3587, 1.6716],
[ 0.7521, 0.8567, 0.3936, ..., 0.1330, -0.0766, 2.1656],
[ 0.7806, -1.6959, -1.1628, ..., -0.8085, -0.7975, 1.2291]], device='cuda:0')
hx torch.Size([10, 128])
.................................
input_k tensor([[ 0.3803, 0.5644, 0.9808, ..., -2.3397, -0.3587, 1.6716],
[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[ 0.0170, 0.0557, -0.4217, ..., 1.0216, 2.1096, 0.2458],
...,
[-0.4336, -0.3118, 0.4406, ..., -1.4510, 0.8474, -1.0817],
[-0.5032, -0.9890, -0.4692, ..., 1.6164, 0.8813, 0.8150],
[-0.4336, -0.3118, 0.4406, ..., -1.4510, 0.8474, -1.0817]], device='cuda:0')
hx torch.Size([10, 128])
/pytorch/aten/src/THC/THCTensorIndex.cu:306: void indexSelectSmallIndex(TensorInfo<T, IndexType>, TensorInfo<T, IndexType>, TensorInfo<long, IndexType>, int, int, IndexType, long) [with T = float, IndexType = unsigned int, DstDim = 2, SrcDim = 2, IdxDim = -2]: block: [0,0,0], thread: [64,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/THC/THCTensorIndex.cu:306: void indexSelectSmallIndex(TensorInfo<T, IndexType>, TensorInfo<T, IndexType>, TensorInfo<long, IndexType>, int, int, IndexType, long) [with T = float, IndexType = unsigned int, DstDim = 2, SrcDim = 2, IdxDim = -2]: block: [0,0,0], thread: [65,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
.................................
/pytorch/aten/src/THC/THCTensorIndex.cu:306: void indexSelectSmallIndex(TensorInfo<T, IndexType>, TensorInfo<T, IndexType>, TensorInfo<long, IndexType>, int, int, IndexType, long) [with T = float, IndexType = unsigned int, DstDim = 2, SrcDim = 2, IdxDim = -2]: block: [0,0,0], thread: [62,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/pytorch/aten/src/THC/THCTensorIndex.cu:306: void indexSelectSmallIndex(TensorInfo<T, IndexType>, TensorInfo<T, IndexType>, TensorInfo<long, IndexType>, int, int, IndexType, long) [with T = float, IndexType = unsigned int, DstDim = 2, SrcDim = 2, IdxDim = -2]: block: [0,0,0], thread: [63,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
THCudaCheck FAIL file=/pytorch/aten/src/THC/generic/THCTensorCopy.c line=70 error=59 : device-side assert triggered
input_k Traceback (most recent call last):
File "pytorch.py", line 72, in <module>
loss = model(Transposed_input, Transposed_target)
File "/home/ochi/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "pytorch.py", line 42, in forward
print("input_k", input_k)
File "/home/ochi/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/tensor.py", line 57, in __repr__
return torch._tensor_str._str(self)
File "/home/ochi/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/_tensor_str.py", line 218, in _str
fmt, scale, sz = _number_format(self)
File "/home/ochi/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/_tensor_str.py", line 79, in _number_format
tensor = torch.DoubleTensor(tensor.size()).copy_(tensor).abs_().view(tensor.nelement())
RuntimeError: cuda runtime error (59) : device-side assert triggered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:70
Thanks!