Say I have computed conv5 feature(say 512x14x14) of VGG16 in a m-class classification problem, and I want to fuse this feature with m different weighting maps for m classes, after this I have m 512x14x14 different features. This is followed by another convolution module which takes a 512x14x14 feature as input and outputs a 1x14x14 map, I want to feed the m 512x14x14 features separately to this one convolution module and get m 1x14x14 maps. Then I feed the m maps to a pooling layer to get m scores for m classes for softmax loss. I tried to do this with a for loop in forward function, but it seems:
- the gradient can not go backward
- This error appears ‘RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed.’
How should I do this correctly in Pytorch? Please leave your advice or any hints.
About question 2. I’ve checked this tutorial about weight sharing. It seems he used the same module multiple times sequentially while I used the same convolution module in parallel, and I got the error, I can’t figure out why?
Here is my code(the code contains more details, I’ll explain them in comments):
def __init__(self, bn=False, gpuID=0):
super(WSDR, self).__init__()
self.conv1 = nn.Sequential(ConvReLU(3, 64, 3, pd=True, bn=bn),
ConvReLU(64, 64, 3, pd=True, bn=bn),
nn.MaxPool2d(2))
self.conv2 = nn.Sequential(ConvReLU(64, 128, 3, pd=True, bn=bn),
ConvReLU(128, 128, 3, pd=True, bn=bn),
nn.MaxPool2d(2))
self.conv3 = nn.Sequential(ConvReLU(128, 256, 3, pd=True, bn=bn),
ConvReLU(256, 256, 3, pd=True, bn=bn),
ConvReLU(256, 256, 3, pd=True, bn=bn),
nn.MaxPool2d(2))
self.conv4 = nn.Sequential(ConvReLU(256, 512, 3, pd=True, bn=bn),
ConvReLU(512, 512, 3, pd=True, bn=bn),
ConvReLU(512, 512, 3, pd=True, bn=bn),
nn.MaxPool2d(2))
self.conv5 = nn.Sequential(ConvReLU(512, 512, 3, pd=True, bn=bn),
ConvReLU(512, 512, 3, pd=True, bn=bn),
ConvReLU(512, 512, 3, pd=True, bn=bn))
# this is the convolution module following the fused m conv5 features
self.feature_gap = nn.Sequential(ConvReLU(512, 256, 3, pd=True, bn=bn),
ConvReLU(256, 128, 3, pd=True, bn=bn),
ConvReLU(128, 64, 3, pd=True, bn=bn),
ConvReLU(64, 1, 3, pd=True, bn=bn))
# this is a module directly following conv5 feature to get one 14x14 map for all m classes
self.feature_box = nn.Sequential(ConvReLU(512, 256, 3, pd=True, bn=bn),
ConvReLU(256, 128, 3, pd=True, bn=bn),
ConvReLU(128, 64, 3, pd=True, bn=bn),
ConvReLU(64, 1, 3, pd=True, bn=bn))
self.gap = nn.AvgPool2d(kernel_size=14, stride=14) # pooling layer
self.box = nn.Sigmoid() # used for generating weights
self.gpuID = gpuID
def forward(self, im_data, pre_maps):
"""
im_data: input image
pre_maps: batchsize x m x 14 x 14, this is the weights for fusing m features
"""
x = self.conv1(im_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
conv5 = self.conv5(x)
batch_sz, ch, sp_sz,_ = conv5.data.size()
cls_features = cls_scores = None
box_forwad = self.feature_box(conv5)
pre_maps = torch.chunk(pre_maps, 20, 1)
for cls_idx in range(self.num_classes): # m classes
pre_map = pre_maps[cls_idx]
# compute weight for current class
box_feature = torch.add(pre_map, box_forwad)
weight= self.box(box_feature)
# fuse weight and conv5 feature for current class
masked_feature = torch.mul(weight.expand(batch_sz, ch, sp_sz, sp_sz), conv5)
# add fused feature and conv5 feature
enhanced_feature = torch.add(masked_feature, conv5)
# feed the combined feature of current class to the convolution module
cls_feature = self.feature_gap(enhanced_feature)
# record current class feature to 'cls_features' which contains 1x14x14 for all classes
cls_features = cls_feature if cls_idx == 0 \
else torch.cat((cls_features, cls_feature), 1)
# record score for current class
cls_score = self.gap(cls_feature).view(cls_feature.size()[0], -1)
cls_scores = cls_score if cls_idx == 0 \
else torch.cat((cls_scores, cls_score), 1)
return cls_scores, cls_features # cls_scores for softmax, cls_features used as weights for next call of forward
# function('pre_maps'), like RNN
# loss code
output, hidden_maps = model(input_var, hidden_maps)
loss = F.multilabel_soft_margin_loss(output, target_var)
loss.backward()