I have a similar problem as this one post. I tried to follow the solution and choose to remove the nn.sigmoid() on my last layer. And now it has changed from previously same loss at every epoch into a very small updates on every epoch. Here is some epoch result
Epoch: 1 Classification Loss : 11.535951576232911
Epoch: 2 Classification Loss : 11.432335233688354
Epoch: 3 Classification Loss : 11.466874022483825
Epoch: 4 Classification Loss : 11.46687402009964
And when I check the weight different between each epoch by printing the last layer of my classification convolution layer, it still output 0
class Decoder_cls(nn.Module):
def __init__(self, image_size=64, conv_in=64, c_dim=4, repeat_num=5):
super(Decoder_cls, self).__init__()
layers = []
conv_dim = conv_in*2
layers.append(nn.Conv2d(conv_in, conv_dim, kernel_size=4, stride=2, padding=1))
layers.append(nn.LeakyReLU(0.01))
curr_dim = conv_dim
for i in range(1, repeat_num):
layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1))
layers.append(nn.LeakyReLU(0.01))
curr_dim = curr_dim * 2
kernel_size = int(image_size / np.power(2, repeat_num))
self.main = nn.Sequential(*layers)
self.conv2 = nn.Conv2d(curr_dim, c_dim, kernel_size=kernel_size, bias=False)
def forward(self, x):
h = self.main(x)
out_cls = self.conv2(h)
return out_cls.view(out_cls.size(0), out_cls.size(1))
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.encoder = Encoder()
self.bottleneck= Bottleneck(256,256)
self.decoder_img = Decoder_img()
#self.decoder_color = Decoder_color()
self.decoder_cls = Decoder_cls()
def forward(self, x):
out = self.encoder(x)
out = self.bottleneck(out)
#out_cls = self.decoder_cls(out)
out_cls = self.decoder_cls(torch.narrow(out, 1, 0, 64))
#print(out_cls.size(),out_cls)
out_cls -= out_cls.min(1, keepdim=True)[0]
out_cls /= out_cls.max(1, keepdim=True)[0]
out_cls = torch.floor(out_cls)
c = out_cls.view(out_cls.size(0), out_cls.size(1), 1, 1)
c = c.repeat(1, 1, out.size(2), out.size(3))
color_in = torch.cat([out, c], dim=1)
out_img = self.decoder_img(color_in)
return out_img,out_cls
I tried to print(generator.decoder_cls.conv2.weight.grad)
but the result are all like this.
tensor([[[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
...,
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]]],
[[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
...,
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]]],
[[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
...,
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]]],
[[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
...,
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]],
[[0., 0.],
[0., 0.]]]], device='cuda:0')
I will appreciate any help and advice to fix this problem