I am writing the YOLO v2 code.
I want softmax layer would be the last layer of the classification.
But, when I use softmax layer as my last layer, then error like below occurs.
I can’t find what is wrong with it.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [11, 13, 13, 1, 2]], which is output 0 of SliceBackward, is at version 3; expected version 2 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
And the code is like below
class yolov2(nn.Module):
def __init__(self, b, c):
super(yolov2, self).__init__()
self.b = b
self.c = c
self.first_net = nn.Sequential(
# conv1
# 3 @ 416 * 416
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
# 32 @ 416 * 416
nn.BatchNorm2d(32),
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
# 32 @ 208 * 208
# conv2
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
# 64 @ 208 * 208
nn.BatchNorm2d(64),
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
# 64 @ 104 * 104
# conv3
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
# 128 @ 104 * 104
nn.BatchNorm2d(128),
nn.LeakyReLU(),
# conv4
nn.Conv2d(in_channels=128, out_channels=64, kernel_size=1, stride=1, padding=0),
# 64 @ 104 * 104
nn.BatchNorm2d(64),
nn.LeakyReLU(),
# conv5
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
# 128 @ 104 * 104
nn.BatchNorm2d(128),
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
# 128 @ 52 * 52
# conv6
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
# 256 @ 52*52
nn.BatchNorm2d(256),
nn.LeakyReLU(),
# conv7
nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=0),
# 128 @ 52*52
nn.BatchNorm2d(128),
nn.LeakyReLU(),
# conv8
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
# 256 @ 52*52
nn.BatchNorm2d(256),
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
# 256 @ 26*26
# conv9
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
# 512 @ 26*26
nn.BatchNorm2d(512),
nn.LeakyReLU(),
# conv10
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=0),
# 256 @ 26*26
nn.BatchNorm2d(256),
nn.LeakyReLU(),
# conv11
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
# 512 @ 26*26
nn.BatchNorm2d(512),
nn.LeakyReLU(),
# conv12
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=0),
# 256 @ 26*26
nn.BatchNorm2d(256),
nn.LeakyReLU(),
# conv13
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
# 512 @ 26*26
nn.BatchNorm2d(512),
nn.LeakyReLU()
)
self.second_net = nn.Sequential(
# 512 @ 26 * 26
# conv14
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1),
# 1024 @ 26 * 26
nn.BatchNorm2d(1024),
nn.LeakyReLU(),
# conv15
nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=0),
# 512 @ 26 * 26
nn.BatchNorm2d(512),
nn.LeakyReLU(),
# conv16
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1),
# 1024 @ 26 * 26
nn.BatchNorm2d(1024),
nn.LeakyReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
# 1024 @ 13 * 13
# conv17
nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=0),
# 512 @ 13 * 13
nn.BatchNorm2d(512),
nn.LeakyReLU(),
# conv18
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1),
# 1024 @ 13 * 13
nn.BatchNorm2d(1024),
nn.LeakyReLU(),
# conv19
nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1),
# 1024 @ 13 * 13
nn.BatchNorm2d(1024),
nn.LeakyReLU(),
# conv20
nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1),
# 1024 @ 13 * 13
nn.BatchNorm2d(1024),
nn.LeakyReLU()
)
self.third_net = nn.Sequential(
# 3072 @ 13* 13
# conv21
nn.Conv2d(in_channels=3072, out_channels=1024, kernel_size=3, stride=1, padding=1),
# 1024 @ 13* 13
nn.BatchNorm2d(1024),
nn.LeakyReLU(),
# conv22
nn.Conv2d(in_channels=1024, out_channels= (5+self.c) *self.b, kernel_size=1, stride=1, padding=0),
# 125 @ 13* 13
nn.LeakyReLU()
)
self.first_net.cuda()
self.second_net.cuda()
self.third_net.cuda()
self._initialize_weights()
def pass_layer(self,x):
# 512 @ 26 * 26 -> 2048 @ 13 * 13
stride = 2
batch_size, channels, height, width = x.size() # 32, 512, 26, 26
new_ht = int(height/stride) # 13
new_wd = int(width/stride) # 13
new_channels = channels * stride * stride # 2048
passthrough = x.permute(0, 2, 3, 1) # 32 26 26 512
passthrough = passthrough.contiguous().view(-1, new_ht, stride, new_wd, stride, channels) # 32 * 13 * 2 * 13 * 2 * 512
passthrough = passthrough.permute(0, 1, 3, 2, 4, 5) # 32 * 13 * 13 * 2 * 2 * 512
passthrough = passthrough.contiguous().view(-1, new_ht, new_wd, new_channels) # 32 * 13 * 13 * 2048
passthrough = passthrough.permute(0, 3, 1, 2) # 32 * 2048 * 13 * 13
return passthrough
def forward(self, x, anchor_box, b, c):
# print('input network size')
# print(np.shape(x))
output = self.first_net(x)
# 512 @ 26 * 26
passthrough = self.pass_layer(output)
# 2048 @ 13 * 13
output = self.second_net(output)
# 1024 @ 13 * 13
output = torch.cat([passthrough, output],1)
# 3072 @ 13 * 13
output = self.third_net(output)
# 125 @ 13 * 13
output = output.permute(0, 2, 3, 1) # [ Batch_size, 13, 13, 125]
# batch * 13 * 13 * 125
output = output.view(output.size()[0],output.size()[1],output.size()[2],self.b,5+self.c)
# batch * 13 * 13 * 5 * 25
# output = F.sigmoid(output)
output[:, :, :, :, 2:4] = F.sigmoid(output[:, :, :, :, 2:4])
output[:, :, :, :, 4] = F.sigmoid(output[:, :, :, :, 4])
output[:, :, :, :, 5:] = nn.functional.softmax(output[:, :, :, :, 5:])
# output[:,:,:,:,5:] = F.sigmoid(output[:,:,:,:,5:])
output = output.view(output.size()[0], output.size()[1], output.size()[2],-1)
return output
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()