cfg = {
‘VGG11’: [64, ‘M’, 128, ‘M’, 256, 256, ‘M’, 512, 512, ‘M’, 512, 512, ‘M’],
‘VGG13’: [64, 64, ‘M’, 128, 128, ‘M’, 256, 256, ‘M’, 512, 512, ‘M’, 512, 512, ‘M’],
‘VGG16’: [64, 64, ‘M’, 128, 128, ‘M’, 256, 256, 256, ‘M’, 512, 512, 512, ‘M’, 512, 512, 512, ‘M’],
‘VGG19’: [64, 64, ‘M’, 128, 128, ‘M’, 256, 256, 256, 256, ‘M’, 512, 512, 512, 512, ‘M’, 512, 512, 512, 512, ‘M’],
}
class VGG(nn.Module):
def __init__(self, vgg_name):
super(VGG, self).__init__()
self.cfg = cfg[vgg_name]
self.teacher = self._make_layers()
self.pool = nn.AvgPool2d(kernel_size=1, stride=1)
self.linear = nn.Linear(512, 3) # change last layer to 3
def forward(self, x):
teacher_counter = 0
student_features = []
feature = x
for block in self.cfg:
if block == 'M':
feature = self.teacher[teacher_counter](feature)
student_features.append(feature)
teacher_counter += 1
out = self.pool(feature)
out = out.view(out.size(0), -1)
out = self.linear(feature)
print(len(student_features))
return out, student_features
def _make_layers(self):
layers = []
teacher = []
in_channels = 3
for x in self.cfg:
if x == 'M':
teacher.append(nn.Sequential(*layers).to("cuda"))
layers = []
else:
layers += [
nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)
]
in_channels = x
return teacher
The goal is to return the final feature of VGG16 and get features for each ‘M’ in student_features. Could anyone please let me know if anything is wrong with the above implementation? The error message now says “CUDA out of memory” even though it works perfectly with the original VGG-16 under the same batch size