Hi, I’m faced with GPU out of memory
Problem.
Here’s the test script with VGG net and dummy data, attached below.
I run it in GPU K40 with 12GB, using the latest version of pytorch. It throws the out of memory error after several iterations(less than 10).
It will happen when trying to compute conv1 layer.
Can anyone help me check what’s the problem?
Is that problem with my implementation?
Any suggestions are appreciated.
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
def set_trainable(model, requires_grad):
for param in model.parameters():
param.requires_grad = requires_grad
class VGG16(nn.Module):
def __init__(self, bn=False):
super(VGG16, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding=1),
#nn.ReLU(inplace=True)
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(2))
self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(2))
set_trainable(self.conv1, requires_grad=False)
set_trainable(self.conv2, requires_grad=False)
self.conv3 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(2))
self.conv4 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(2))
self.conv5 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(4))
self.fc6_new = nn.Linear(512 * 7 * 7, 4096)
self.fc7_new = nn.Linear(4096, 4096)
self.score_fc = nn.Linear(4096, 150)
self.relu = nn.ReLU(inplace=True)
def forward(self, im_data):
x = self.conv1(im_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = x.view(x.size()[0], -1)
x = self.fc6_new(x)
x = self.relu(x)
x = self.fc7_new(x)
x = self.relu(x)
x = self.score_fc(x)
return x
def train():
net = VGG16()
net = net.cuda()
lr = 0.0001
optimizer = torch.optim.SGD([
{'params': net.conv3.parameters()},
{'params': net.conv4.parameters()},
{'params': net.conv5.parameters()},
{'params': net.fc6_new.parameters(), 'lr': lr * 10.0},
{'params': net.fc7_new.parameters(), 'lr': lr * 10.0},
{'params': net.score_fc.parameters(), 'lr': lr * 10.0},
], lr=lr, momentum=0.9, weight_decay=0.005)
for i in xrange(1000):
im_data = np.random.rand(32, 3, 448, 448).astype(np.float32)
label = np.floor(np.random.rand(32) * 150).astype(np.int64)
out = net(Variable(torch.from_numpy(im_data).cuda()))
loss = F.cross_entropy(out, Variable(torch.from_numpy(label).cuda()), size_average=True)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Iteration %d finished. Loss: %.4f' % (i, loss.data.cpu().numpy()))
if __name__ == '__main__':
train()