Help to optimise memory usage! cuda out of memory

I am tring to train model but somehow on google colab i am not able to train it due to error “cuda error:out of memory”

My Model

# loading pre-trained vgg16 model
import torch
import torch.nn as nn
import torchvision.models as models



class MLNet(nn.Module):
    
    def __init__(self):
        super(MLNet, self).__init__()
        
        # removing last max pooling layer
        features = list(models.vgg16(pretrained = True).features)[:-1]
        
        # making same spatial size
        # by calculation :) 
        features[23].stride = 1
        features[23].kernel_size = 5
        features[23].padding = 2
        
#         for ff in features:
#           print (ff)
        
  
        self.features = nn.ModuleList(features).eval() 
        self.fddropout = nn.Dropout2d(p=0.5)
        self.int_conv = nn.Conv2d(1280,64,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.pre_final_conv = nn.Conv2d(64,1,kernel_size=(1, 1), stride=(1, 1) ,padding=(0, 0))
        
        self.prior = torch.ones((1,1,6,8)).cuda()
        
        
    def forward(self, x):
        results = []
        for ii,model in enumerate(self.features):
            x = model(x)
            if ii in {16,23,29}:
                results.append(x)
        
        vgg16_fd = torch.cat((results[0],results[1],results[2]),1) 
        
        vgg16_fd_dropout = self.fddropout(vgg16_fd)
        
        out = self.int_conv(vgg16_fd_dropout)
        out = self.pre_final_conv(out)
        
        
        upscaled_prior = torch.nn.functional.upsample_bilinear(self.prior,scale_factor=10)
#         print ("upscaled_prior shape: {}".format(upscaled_prior.shape))

        out = out * upscaled_prior
        out = torch.nn.functional.relu(out)
        return out

  
model = MLNet().cuda()
out = model.forward(torch.randn(5,3,480,640).cuda())
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3,weight_decay=1e-5)

and for training code is like this.

for i,j in generator(2):   # batch size : 2
  optimizer.zero_grad()
  i,j = torch.tensor(i,dtype=torch.float),torch.tensor(j)
  i,j = i.cuda(),j.cuda()
  out = model(i)
  loss_contrastive = criterion(out,j)
  loss_contrastive.backward()
  optimizer.step()
  print (loss_contrastive.item())

Error:

<ipython-input-9-73a253f89711> in <module>()
      3   i,j = torch.tensor(i,dtype=torch.float),torch.tensor(j)
      4   i,j = i.cuda(),j.cuda()
----> 5   out = model(i)
      6   loss_contrastive = criterion(out,j)
      7   loss_contrastive.backward()

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    475             result = self._slow_forward(*input, **kwargs)
    476         else:
--> 477             result = self.forward(*input, **kwargs)
    478         for hook in self._forward_hooks.values():
    479             hook_result = hook(self, input, result)

<ipython-input-4-1d33208959c1> in forward(self, x)
     34         results = []
     35         for ii,model in enumerate(self.features):
---> 36             x = model(x)
     37             if ii in {16,23,29}:
     38                 results.append(x)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    475             result = self._slow_forward(*input, **kwargs)
    476         else:
--> 477             result = self.forward(*input, **kwargs)
    478         for hook in self._forward_hooks.values():
    479             hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/conv.py in forward(self, input)
    299     def forward(self, input):
    300         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 301                         self.padding, self.dilation, self.groups)
    302 
    303 

RuntimeError: CUDA error: out of memory

Model which i am trying to create is : Marcella Cornia, Lorenzo Baraldi, Giuseppe Serra, Rita Cucchiara. “A Deep Multi-Level Network for Saliency Prediction.” In Proceedings of the 23rd International Conference on Pattern Recognition, 2016.

can anyone optimize feature extraction in this model.

 for ii,model in enumerate(self.features):
            x = model(x)
            if ii in {16,23,29}:
                results.append(x)

The image size given (480x640) is fairly large. Can you try with a single image or smaller image ?

single image does work but 12GB RAM of GPU in colab should be sufficient for this thing.

As @InnovArul mentioned, the image size is pretty large. So depending on the GPU you are using and how much memory you have.
Instead of using VGG16, maybe you can try another memory-efficient model, ResNet-18, or even SqueezeNet.