GPU OOM with Simple VGG net

Ethan_Zhu · March 26, 2017, 9:24pm

Hi, I’m faced with GPU out of memory Problem.
Here’s the test script with VGG net and dummy data, attached below.
I run it in GPU K40 with 12GB, using the latest version of pytorch. It throws the out of memory error after several iterations(less than 10).
It will happen when trying to compute conv1 layer.

Can anyone help me check what’s the problem?
Is that problem with my implementation?

Any suggestions are appreciated.


import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np

def set_trainable(model, requires_grad):
    for param in model.parameters():
        param.requires_grad = requires_grad

class VGG16(nn.Module):
    def __init__(self, bn=False):
        super(VGG16, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding=1),
                                   #nn.ReLU(inplace=True)
                                   nn.ReLU(),
                                   nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2))
        self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2))
        set_trainable(self.conv1, requires_grad=False)
        set_trainable(self.conv2, requires_grad=False)
        self.conv3 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2))
        self.conv4 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2))
        self.conv5 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(4))
        self.fc6_new = nn.Linear(512 * 7 * 7, 4096)
        self.fc7_new = nn.Linear(4096, 4096)
        self.score_fc = nn.Linear(4096, 150)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, im_data):
        x = self.conv1(im_data)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(x.size()[0], -1)
        x = self.fc6_new(x)
        x = self.relu(x)
        x = self.fc7_new(x)
        x = self.relu(x)
        x = self.score_fc(x)
        return x

def train():
    net = VGG16()
    net = net.cuda()
    lr = 0.0001
    optimizer = torch.optim.SGD([
        {'params': net.conv3.parameters()},
        {'params': net.conv4.parameters()},
        {'params': net.conv5.parameters()},
        {'params': net.fc6_new.parameters(), 'lr': lr * 10.0},
        {'params': net.fc7_new.parameters(), 'lr': lr * 10.0},
        {'params': net.score_fc.parameters(), 'lr': lr * 10.0},
    ], lr=lr, momentum=0.9, weight_decay=0.005)
    for i in xrange(1000):
        im_data = np.random.rand(32, 3, 448, 448).astype(np.float32)
        label = np.floor(np.random.rand(32) * 150).astype(np.int64)
        out = net(Variable(torch.from_numpy(im_data).cuda()))
        loss = F.cross_entropy(out, Variable(torch.from_numpy(label).cuda()), size_average=True)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('Iteration %d finished.  Loss: %.4f' % (i, loss.data.cpu().numpy()))


if __name__ == '__main__':
    train()

smth · March 27, 2017, 2:38am

If you reproduce your issue to 25 lines to 40 lines of code, it will be very helpful for others to investigate.

Ethan_Zhu · March 27, 2017, 3:06am

Thank you for your reply. I am not sure what you mean.
But I found the format of the code was messed up. Now I’ve revised it.
The script can be directly copied and run now.

marsggbo · March 12, 2018, 3:43am

How you revised your code?