I am experimenting with Densenet implementation and trying to save intermediate output to a class variable . Here’s a snippet of code
class Bottleneck(nn.Module):
out_saved = [0] * 900
def init(self, nChannels, growthRate,layer_index, train_size, test_size, batch_sz):
super(Bottleneck, self).init()
interChannels = 4*growthRate
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1,
bias=False)
self.bn2 = nn.BatchNorm2d(interChannels)
self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3,
padding=1, bias=False)
# If the layer is still being trained
self.active=True
# The index of this layer relative to the overall net
self.layer_index=layer_index
self.counter = 0
self.train_size = train_size
self.test_size = test_size
self.batch_sz = batch_sz
remainder = train_size % batch_sz
self.maxCounter = train_size//batch_sz
if(remainder != 0):
self.maxCounter += 1
out_saved = [0] * (self.maxCounter)
def forward(self, x):
if not self.active:
self.eval()
# If we're not active, return a detached output to prevent backprop.
if self.active:
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat((x, out), 1)
if(self.layer_index < DenseNet.freezeLayerIndex):
Bottleneck.out_saved[self.counter] = out
self.counter+=1
if(self.counter >= self.maxCounter):
self.counter = 0
self.active = False
return out
else:
detach = Bottleneck.out_saved[self.counter].detach()
self.counter+=1
if(self.counter >= self.maxCounter):
self.counter = 0
detach.volatile= False
return detach
Based on my understanding of how python works, i assumed Bottleneck.out_saved[self.counter] is saved in main memory instead of GPU. But I am getting an memory out of error with CIFAR data set where i used 5000 images while training. I did not quite understand why saving intermediate output will make GPU out of memory.