OutOfMemory-Exception when testing on self trained VGG16 model

I get a OutOfMemory exception when testing on images. I use a selftrained modified VGG16-network. I train only on 1000 Pixels of a 224*224Pixel image.
When Testing I use the whole image, but due to short memory capacity of my GPU (8GB) in a first step I send the first part of the image through the net, and then the second part and then I combine the results for the whole image.
It works totally fine for the first image, but in the second image in runs into the OutOfMemory exception.

<ipython-input-5-4f4e71ecf156> in forward(self, x)
--> 176                         x = self.classifier(Variable(self.hypercolumns_tensor))
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
--> 357             result = self.forward(*input, **kwargs)
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
---> 67             input = module(input)
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
--> 357             result = self.forward(*input, **kwargs)
~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/linear.py in forward(self, input)
---> 55         return F.linear(input, self.weight, self.bias)
~/miniconda3/lib/python3.6/site-packages/torch/nn/functional.py in linear(input, weight, bias)
--> 838     output = input.matmul(weight.t())
~/miniconda3/lib/python3.6/site-packages/torch/autograd/variable.py in matmul(self, other
--> 386         return torch.matmul(self, other)
~/miniconda3/lib/python3.6/site-packages/torch/functional.py in matmul(tensor1, tensor2, out)
--> 192             output = torch.mm(tensor1, tensor2)

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518243271935/work/torch/lib/THC/generic/THCStorage.cu:58

I found out it doesn’t do so, when I “fake” the second part of the image by just creating a tensor with the correct size and random numbers. Also when I get both part of the images while testing. When I combine them to get the result all works fine but the next image runs in a memory-Exception.

Does anyone have an idea why this is happening or what I could check to find the cause for that problem?

Here’s my code (the testing part of my network and my testing func):

class Net(nn.Module):

def __init__(self, vgg):
	super(Net, self).__init__()
	print('start init Net')
	#print('vgg network:', vgg)
	self.features = vgg.features
    
	self.first_part_image = True
	print('list(vgg.classifier.children()):', list(vgg.classifier.children()))
	fc_layers = nn.Sequential(
		*list(vgg.classifier.children())[:-1]
	)
	# converting fc layers to conv layers
	self.fc = fc_layers[0].state_dict()
	in_ch = 512
	out_ch = self.fc["weight"].size(0)
	print('in_ch:', in_ch, 'out_ch:', out_ch)
	
	firstConv = nn.Conv2d(in_ch, out_ch, 7)
	
	# get the weights from the fc layers
	firstConv.load_state_dict({"weight": self.fc["weight"].view(out_ch, in_ch, 7, 7),
                               "bias": self.fc["bias"]})
	
	# create a list of convs
	convList = [firstConv]
	
	for layer in range(6):
		if layer == 3:
			# convert the nn.Linear to nn.Conv
			self.fc = fc_layers[layer].state_dict()
			in_ch = self.fc["weight"].size(1)
			out_ch = self.fc["weight"].size(0)
			conv = nn.Conv2d(in_ch, out_ch, 1)
			
			conv.load_state_dict({"weight": self.fc["weight"].view(out_ch, in_ch, 1, 1),
                                  "bias": self.fc["bias"]})
			
			convList += [conv]
		elif layer == 1 or layer == 4:
			convList += [nn.ReLU()]
		elif layer == 2 or layer == 5:
			convList += [nn.Dropout()]
	
	# set the conv layers as a nn.Sequential module
	self.conv6_7 = nn.Sequential(*convList)
    
	# creating classifier
	self.classifier = nn.Sequential(
		nn.Linear(5568, 4096),
		nn.ReLU(inplace=True),
		nn.Dropout(inplace=True),
		nn.Linear(4096, 4096),
		nn.ReLU(inplace=True),
		nn.Dropout(inplace=True),
		nn.Linear(4096, 3)
	)
    
	# setting weights and bias for classifier
	self.classifier[0].weight.data.normal_(0, 0.0005)
	self.classifier[0].bias.data.fill_(0.1)
	self.classifier[3].weight.data.normal_(0, 0.0005)
	self.classifier[3].bias.data.fill_(0.1)
	self.classifier[6].weight.data.normal_(0, 0.0005)
	self.classifier[6].bias.data.fill_(0.1)
    
	print('self.features:', self.features)
	print('self.conv6_7:', self.conv6_7)
	print('self.classifier:', self.classifier)

    
	self.PIC_HEIGHT = 224 # before 240
	self.PIC_WIDTH = 224 # before 320
	
	self.output_layer1_2 = torch.FloatTensor().cuda()
	self.output_layer2_2 = torch.FloatTensor().cuda()
	self.output_layer3_3 = torch.FloatTensor().cuda()
	self.output_layer4_3 = torch.FloatTensor().cuda()
	self.output_layer5_3 = torch.FloatTensor().cuda()
	self.output_conv7 = torch.FloatTensor().cuda()
    
	self.features[3].register_forward_hook(self.get_layer1_2)
	self.features[8].register_forward_hook(self.get_layer2_2)
	self.features[15].register_forward_hook(self.get_layer3_3)
	self.features[22].register_forward_hook(self.get_layer4_3)
	self.features[29].register_forward_hook(self.get_layer5_3)
	self.conv6_7[5].register_forward_hook(self.get_conv7)
    
    
	self.hypercolumns_tensor = torch.FloatTensor().cuda()
	self.target_tensor = torch.FloatTensor().cuda()
	self.input_bak = torch.FloatTensor().cuda()
	self.first_half_pic = torch.FloatTensor().cuda()
	self.second_half_pic = torch.FloatTensor().cuda()
	self.whole_pic = torch.FloatTensor().cuda()

def forward(self, x):
	if self.training == False:
		# saving input for second part of image
		self.input_bak = x
		########## FIRST HALF OF THE IMAGE ###########
		self.first_part_image = True
		x = self.features(x)
		x = self.conv6_7(x)
		# combining the hypercolumns from different layers to one hypercolumn per pixel
		self.hypercolumns_tensor = torch.cat(
            (self.output_layer1_2, self.output_layer2_2, 
             self.output_layer3_3, self.output_layer4_3, 
             self.output_layer5_3, self.output_conv7
            ), 1)
		# permute tensor for classifier
		self.hypercolumns_tensor = self.hypercolumns_tensor.permute(0,2,1)
		x = self.classifier(Variable(self.hypercolumns_tensor))
		x = F.normalize(x, p = 2, dim = 2)
		x = x.permute(0, 2, 1)
		self.first_half_pic = x
		#first_half_pic = Variable(torch.randn(torch.Size((1, 3, 25088)), out=torch.cuda.FloatTensor(torch.Size((1, 3, 25088)))))
		########## SECOND HALF OF THE IMAGE ###########
		x = self.input_bak
		self.first_part_image = False
		x = self.features(x)
		x = self.conv6_7(x)
		# combining the hypercolumns from different layers to one hypercolumn per pixel
		self.hypercolumns_tensor = torch.cat(
            (self.output_layer1_2, self.output_layer2_2, 
             self.output_layer3_3, self.output_layer4_3, 
             self.output_layer5_3, self.output_conv7
            ), 1)
		# permute tensor for classifier
		self.hypercolumns_tensor = self.hypercolumns_tensor.permute(0,2,1)
		x = self.classifier(Variable(self.hypercolumns_tensor))
		x = F.normalize(x, p = 2, dim = 2)
		x = x.permute(0, 2, 1)
		self.second_half_pic = x
		#second_half_pic = Variable(torch.randn(torch.Size((1, 3, 25088)), out=torch.cuda.FloatTensor(torch.Size((1, 3, 25088)))))
		# COMBINING THE PIC
		x = torch.cat((self.first_half_pic, self.second_half_pic), 2)
		x = x.view(1, 3, self.PIC_HEIGHT, self.PIC_WIDTH)
		return x

	# for training on all pixels
	if TRAIN_ON_1000_PIXELS == False:
		x = x.view(1, 3, self.PIC_HEIGHT, self.PIC_WIDTH)
	return x

def get_layer1_2(self, layer, input, output):
	if self.training == False or (self.training == True and TRAIN_ON_1000_PIXELS == False):
		if self.first_part_image:
			self.output_layer1_2 = output.data.view(1, 64, int(self.PIC_HEIGHT * self.PIC_WIDTH)).narrow(2, 0, int(self.PIC_HEIGHT * self.PIC_WIDTH / 2))
		else:
			self.output_layer1_2 = output.data.view(1, 64, int(self.PIC_HEIGHT * self.PIC_WIDTH)).narrow(2, int(self.PIC_HEIGHT * self.PIC_WIDTH / 2), int(self.PIC_HEIGHT * self.PIC_WIDTH / 2))
	else:
		self.output_layer1_2 = output.data

 def get_layer2_2(self, layer, input, output), def get_layer3_3(self, layer, input, output), etc.
   (this functions are similar to the first one)

Und hier meine Testfunktion:

def testing(image):
torch.cuda.empty_cache()
  	for iteration, batch in enumerate(test_data_loader, 0):
    
	input, target = Variable(batch[0]), Variable(batch[1])
	target = F.normalize(target, p = 2, dim = 1)
	if cuda:
		input = input.cuda()
		target = target.cuda()

	prediction = model(input)
    
	loss = criterion(prediction, target)

Problem is not very well explained, as far as i understood you are validating a dataset and get out of memory. Keep in mind that while training, when you use model.backwards() memory get clean. If you are validating you are not cleaning the memory. Could it be?

Try out test loop with: with torch.no_grad()

Thanks for the hint. That could it be. But how do I clean the memory? I tried calling torch.cuda.empty_cache() before each image, but it didn’t help.

I tried it but got the following error:

  AttributeError: module 'torch' has no attribute 'no_grad'

I googled that error and found the hint to set volatile=True to all the Variable I’m using, but this also didn’t solve my problem.

Are you using model.eval()?

Yes, I do. For testing I use model.eval(), for training I use model.train().

Torch.no_grad() works for me and it is also used in tutorials.

I found out I had an older version of torch (3.2.something). I uninstalled torch and installed the newest version 4.1.0.

Now it works!

Thanks for all the help!