With ReLU(inplace=True), my model can not be trained, and its loss goes to hundreds of thousands after a few iterations. However, when I replace with ReLU(inplace=False), all trouble disappear and my loss can be converge gradually.
Pytorch didn’t complain any about my inplace ReLU usage before, everything seemed peace, except the larger and large training loss. I have heard someone said “When pytorch doesnt give you error or warning about your inplace operation usage, it almost be correctly working”. But it seems some wrong internal , why did it happen?
Below is my model code:
class ResidualBlock(nn.Module):
def __init__(self, ch_in, ch_out, shortcut = None):
super(ResidualBlock, self).__init__()
self.left = nn.Sequential(
nn.Conv2d(ch_in, 128, 3, 1, 1), # ch_in, ch_out, kernel_size, stride, pad
nn.ReLU(inplace=False),
nn.Conv2d(128, ch_out, 3, 1, 1),
nn.ReLU(inplace=False),
)
self.right = shortcut
def forward(self, x):
out = self.left(x)
residual = x if self.right is None else self.right(x)
out += residual
return F.relu(out, inplace=False)
class ContentWeightedCNN(BasicModule):
def __init__(self, use_imp = True):
super(ContentWeightedCNN, self).__init__()
self.model_name = 'ContentWeightedCNN'
self.use_imp = use_imp
self.encoder = self.make_encoder()
self.decoder = self.make_decoder()
self.reset_parameters()
def reset_parameters(self):
self.apply(weights_initialization)
def forward(self, x):
mgdata = self.encoder(x)
enc_data = mgdata
dec_data = self.decoder(enc_data)
return dec_data
def make_encoder(self):
layers = [
nn.Conv2d(3, 128, 8, 4, 2),
nn.ReLU(inplace=True), # 54
ResidualBlock(128, 128),
nn.Conv2d(128, 256, 4, 2, 1), # 115
nn.ReLU(inplace=True),
ResidualBlock(256, 256),
nn.Conv2d(256, 256, 3, 1, 1), #192
nn.ReLU(inplace=True),
ResidualBlock(256, 256),
nn.Conv2d(256, 64, 1, 1, 0), # conv 4 64 is n
nn.Sigmoid(),
]
return nn.Sequential(*layers)
def make_decoder(self):
layers = [
nn.Conv2d(64, 512, 3, 1, 1),
nn.ReLU(inplace=True),
ResidualBlock(512, 512),
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU(inplace=True),
ResidualBlock(512, 512),
nn.PixelShuffle(2),
nn.Conv2d(128, 256, 3, 1, 1),
nn.ReLU(inplace=True),
ResidualBlock(256, 256),
nn.PixelShuffle(4),
nn.Conv2d(16, 32, 3, 1, 1),
nn.ReLU(inplace=True),
nn.Conv2d(32, 3, 1, 1, 0)
]
return nn.Sequential(*layers)