CUDA Out of Memory Error: Understanding how Pytorch stores class variable

I am experimenting with Densenet implementation and trying to save intermediate output to a class variable . Here’s a snippet of code

class Bottleneck(nn.Module):
out_saved = [0] * 900
def init(self, nChannels, growthRate,layer_index, train_size, test_size, batch_sz):
super(Bottleneck, self).init()
interChannels = 4*growthRate
self.bn1 = nn.BatchNorm2d(nChannels)
self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1,
bias=False)
self.bn2 = nn.BatchNorm2d(interChannels)
self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3,
padding=1, bias=False)

    # If the layer is still being trained
    self.active=True
    
    # The index of this layer relative to the overall net
    self.layer_index=layer_index

    self.counter = 0
    self.train_size = train_size
    self.test_size = test_size
    self.batch_sz = batch_sz
    remainder = train_size % batch_sz
    self.maxCounter = train_size//batch_sz
    if(remainder != 0):
        self.maxCounter += 1 
    out_saved = [0] * (self.maxCounter)

def forward(self, x):
    if not self.active:
        self.eval()

    # If we're not active, return a detached output to prevent backprop.
    if self.active: 
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat((x, out), 1)
        if(self.layer_index < DenseNet.freezeLayerIndex):

            Bottleneck.out_saved[self.counter] = out
            self.counter+=1
            if(self.counter >= self.maxCounter):
                self.counter = 0
                self.active = False   
        return out
    else:
        detach = Bottleneck.out_saved[self.counter].detach()
        self.counter+=1
        if(self.counter >= self.maxCounter):
            self.counter = 0
        detach.volatile= False
        return detach

Based on my understanding of how python works, i assumed Bottleneck.out_saved[self.counter] is saved in main memory instead of GPU. But I am getting an memory out of error with CIFAR data set where i used 5000 images while training. I did not quite understand why saving intermediate output will make GPU out of memory.