I get a OutOfMemory exception when testing on images. I use a selftrained modified VGG16-network. I train only on 1000 Pixels of a 224*224Pixel image.
When Testing I use the whole image, but due to short memory capacity of my GPU (8GB) in a first step I send the first part of the image through the net, and then the second part and then I combine the results for the whole image.
It works totally fine for the first image, but in the second image in runs into the OutOfMemory exception.
<ipython-input-5-4f4e71ecf156> in forward(self, x)
--> 176 x = self.classifier(Variable(self.hypercolumns_tensor))
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
--> 357 result = self.forward(*input, **kwargs)
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
---> 67 input = module(input)
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
--> 357 result = self.forward(*input, **kwargs)
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/linear.py in forward(self, input)
---> 55 return F.linear(input, self.weight, self.bias)
~/miniconda3/lib/python3.6/site-packages/torch/nn/functional.py in linear(input, weight, bias)
--> 838 output = input.matmul(weight.t())
~/miniconda3/lib/python3.6/site-packages/torch/autograd/variable.py in matmul(self, other
--> 386 return torch.matmul(self, other)
~/miniconda3/lib/python3.6/site-packages/torch/functional.py in matmul(tensor1, tensor2, out)
--> 192 output = torch.mm(tensor1, tensor2)
RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518243271935/work/torch/lib/THC/generic/THCStorage.cu:58
I found out it doesn’t do so, when I “fake” the second part of the image by just creating a tensor with the correct size and random numbers. Also when I get both part of the images while testing. When I combine them to get the result all works fine but the next image runs in a memory-Exception.
Does anyone have an idea why this is happening or what I could check to find the cause for that problem?
Here’s my code (the testing part of my network and my testing func):
class Net(nn.Module):
def __init__(self, vgg):
super(Net, self).__init__()
print('start init Net')
#print('vgg network:', vgg)
self.features = vgg.features
self.first_part_image = True
print('list(vgg.classifier.children()):', list(vgg.classifier.children()))
fc_layers = nn.Sequential(
*list(vgg.classifier.children())[:-1]
)
# converting fc layers to conv layers
self.fc = fc_layers[0].state_dict()
in_ch = 512
out_ch = self.fc["weight"].size(0)
print('in_ch:', in_ch, 'out_ch:', out_ch)
firstConv = nn.Conv2d(in_ch, out_ch, 7)
# get the weights from the fc layers
firstConv.load_state_dict({"weight": self.fc["weight"].view(out_ch, in_ch, 7, 7),
"bias": self.fc["bias"]})
# create a list of convs
convList = [firstConv]
for layer in range(6):
if layer == 3:
# convert the nn.Linear to nn.Conv
self.fc = fc_layers[layer].state_dict()
in_ch = self.fc["weight"].size(1)
out_ch = self.fc["weight"].size(0)
conv = nn.Conv2d(in_ch, out_ch, 1)
conv.load_state_dict({"weight": self.fc["weight"].view(out_ch, in_ch, 1, 1),
"bias": self.fc["bias"]})
convList += [conv]
elif layer == 1 or layer == 4:
convList += [nn.ReLU()]
elif layer == 2 or layer == 5:
convList += [nn.Dropout()]
# set the conv layers as a nn.Sequential module
self.conv6_7 = nn.Sequential(*convList)
# creating classifier
self.classifier = nn.Sequential(
nn.Linear(5568, 4096),
nn.ReLU(inplace=True),
nn.Dropout(inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(inplace=True),
nn.Linear(4096, 3)
)
# setting weights and bias for classifier
self.classifier[0].weight.data.normal_(0, 0.0005)
self.classifier[0].bias.data.fill_(0.1)
self.classifier[3].weight.data.normal_(0, 0.0005)
self.classifier[3].bias.data.fill_(0.1)
self.classifier[6].weight.data.normal_(0, 0.0005)
self.classifier[6].bias.data.fill_(0.1)
print('self.features:', self.features)
print('self.conv6_7:', self.conv6_7)
print('self.classifier:', self.classifier)
self.PIC_HEIGHT = 224 # before 240
self.PIC_WIDTH = 224 # before 320
self.output_layer1_2 = torch.FloatTensor().cuda()
self.output_layer2_2 = torch.FloatTensor().cuda()
self.output_layer3_3 = torch.FloatTensor().cuda()
self.output_layer4_3 = torch.FloatTensor().cuda()
self.output_layer5_3 = torch.FloatTensor().cuda()
self.output_conv7 = torch.FloatTensor().cuda()
self.features[3].register_forward_hook(self.get_layer1_2)
self.features[8].register_forward_hook(self.get_layer2_2)
self.features[15].register_forward_hook(self.get_layer3_3)
self.features[22].register_forward_hook(self.get_layer4_3)
self.features[29].register_forward_hook(self.get_layer5_3)
self.conv6_7[5].register_forward_hook(self.get_conv7)
self.hypercolumns_tensor = torch.FloatTensor().cuda()
self.target_tensor = torch.FloatTensor().cuda()
self.input_bak = torch.FloatTensor().cuda()
self.first_half_pic = torch.FloatTensor().cuda()
self.second_half_pic = torch.FloatTensor().cuda()
self.whole_pic = torch.FloatTensor().cuda()
def forward(self, x):
if self.training == False:
# saving input for second part of image
self.input_bak = x
########## FIRST HALF OF THE IMAGE ###########
self.first_part_image = True
x = self.features(x)
x = self.conv6_7(x)
# combining the hypercolumns from different layers to one hypercolumn per pixel
self.hypercolumns_tensor = torch.cat(
(self.output_layer1_2, self.output_layer2_2,
self.output_layer3_3, self.output_layer4_3,
self.output_layer5_3, self.output_conv7
), 1)
# permute tensor for classifier
self.hypercolumns_tensor = self.hypercolumns_tensor.permute(0,2,1)
x = self.classifier(Variable(self.hypercolumns_tensor))
x = F.normalize(x, p = 2, dim = 2)
x = x.permute(0, 2, 1)
self.first_half_pic = x
#first_half_pic = Variable(torch.randn(torch.Size((1, 3, 25088)), out=torch.cuda.FloatTensor(torch.Size((1, 3, 25088)))))
########## SECOND HALF OF THE IMAGE ###########
x = self.input_bak
self.first_part_image = False
x = self.features(x)
x = self.conv6_7(x)
# combining the hypercolumns from different layers to one hypercolumn per pixel
self.hypercolumns_tensor = torch.cat(
(self.output_layer1_2, self.output_layer2_2,
self.output_layer3_3, self.output_layer4_3,
self.output_layer5_3, self.output_conv7
), 1)
# permute tensor for classifier
self.hypercolumns_tensor = self.hypercolumns_tensor.permute(0,2,1)
x = self.classifier(Variable(self.hypercolumns_tensor))
x = F.normalize(x, p = 2, dim = 2)
x = x.permute(0, 2, 1)
self.second_half_pic = x
#second_half_pic = Variable(torch.randn(torch.Size((1, 3, 25088)), out=torch.cuda.FloatTensor(torch.Size((1, 3, 25088)))))
# COMBINING THE PIC
x = torch.cat((self.first_half_pic, self.second_half_pic), 2)
x = x.view(1, 3, self.PIC_HEIGHT, self.PIC_WIDTH)
return x
# for training on all pixels
if TRAIN_ON_1000_PIXELS == False:
x = x.view(1, 3, self.PIC_HEIGHT, self.PIC_WIDTH)
return x
def get_layer1_2(self, layer, input, output):
if self.training == False or (self.training == True and TRAIN_ON_1000_PIXELS == False):
if self.first_part_image:
self.output_layer1_2 = output.data.view(1, 64, int(self.PIC_HEIGHT * self.PIC_WIDTH)).narrow(2, 0, int(self.PIC_HEIGHT * self.PIC_WIDTH / 2))
else:
self.output_layer1_2 = output.data.view(1, 64, int(self.PIC_HEIGHT * self.PIC_WIDTH)).narrow(2, int(self.PIC_HEIGHT * self.PIC_WIDTH / 2), int(self.PIC_HEIGHT * self.PIC_WIDTH / 2))
else:
self.output_layer1_2 = output.data
def get_layer2_2(self, layer, input, output), def get_layer3_3(self, layer, input, output), etc.
(this functions are similar to the first one)
Und hier meine Testfunktion:
def testing(image):
torch.cuda.empty_cache()
for iteration, batch in enumerate(test_data_loader, 0):
input, target = Variable(batch[0]), Variable(batch[1])
target = F.normalize(target, p = 2, dim = 1)
if cuda:
input = input.cuda()
target = target.cuda()
prediction = model(input)
loss = criterion(prediction, target)