Hello,
I am facing a curious problem where by exceeding a batch size of 2, I am triggering an OOM error with CUDA even though nvidia-smi reports a low memory usage at a batch size of 2.
The error I receive is as follows:
THCudaCheck FAIL file=/pytorch/torch/lib/THC/generic/THCTensorMath.cu line=35 error=2 : out of memory
Traceback (most recent call last):
File "Train1.py", line 189, in <module>
optimizer.step()
File "/usr/local/lib/python3.5/dist-packages/torch/optim/adam.py", line 56, in step
state['exp_avg'] = torch.zeros_like(p.data)
RuntimeError: cuda runtime error (2) : out of memory at /pytorch/torch/lib/THC/generic/THCTensorMath.cu:35
The model that I’m working looks like this:
class Model(Module):
def __init__(self, labels_length):
super(Model, self).__init__()
self.conv1 = Conv2d(1, 8, 5)
self.conv2 = Conv2d(8, 16, 5)
self.conv3 = Conv2d(16, 32, 5)
self.conv4 = Conv2d(32, 64, 5)
self.conv5 = Conv2d(64, 128, 5)
self.maxpool1 = MaxPool2d((2,2))
self.maxpool2 = MaxPool2d((2,2))
self.maxpool3 = MaxPool2d((2,2))
self.gru = GRU(128, 256, 3, batch_first=True, bidirectional=True)
#self.gru = GRU(128, 256, 3, batch_first=True)
self.fn1 = torch.nn.Linear(512, labels_length)
def forward(self, x, batch_size):
x = relu(self.conv1(x))
x = self.maxpool1(x)
x = relu(self.conv2(x))
x = self.maxpool2(x)
x = relu(self.conv3(x))
x = self.maxpool3(x)
x = relu(self.conv4(x))
x = relu(self.conv5(x))
h0 = Variable(torch.randn(6, batch_size, 256)).cuda()
x = x.squeeze(2)
x = x.transpose(1,2)
self.gru.flatten_parameters()
output, hn = self.gru(x, h0)
output = self.fn1(output)
return output
This is running on PyTorch 0.3 CUDA 9 CUDNN 7.
Here’s a screenshot of my nvidia-smi output:
Thanks