Hi,
I am using the following generator model for a project, which is similar to DCGAN tutorial. The only difference is that I have added a couple of Residual Blocks in the beginning. In train mode, everything works fine and proper results are generated. However, if I set the model to eval mode using .eval(), then the model generates NaN output.
I have narrowed it down to an issue in the residual block, but I am not sure why NaN is being generated. I am suspecting that the issue might be due to the Instance Norm layer. Could someone clarify why this could be happening?
Note: Input to the generator is a Bx40x1x1 tensor, which consists of 1s ans 0s.
class ResidualBlock(nn.Module):
"""Residual Block with instance normalization."""
def __init__(self, dim_in, dim_out):
super(ResidualBlock, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(dim_in, dim_out, kernel_size=3, stride=1, padding=1, bias=False),
nn.InstanceNorm2d(dim_out, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
nn.Conv2d(dim_out, dim_out, kernel_size=3, stride=1, padding=1, bias=False),
nn.InstanceNorm2d(dim_out, affine=True, track_running_stats=True))
def forward(self, x):
return x + self.main(x)
class Generator(nn.Module):
def __init__(self, in_dim=40, conv_dim=64, out_dim=3):
super(Generator, self).__init__()
self.feat_extractor = nn.Sequential(
# Residual Blocks
ResidualBlock(in_dim, in_dim),
ResidualBlock(in_dim, in_dim),
ResidualBlock(in_dim, in_dim),
ResidualBlock(in_dim, in_dim),
ResidualBlock(in_dim, in_dim),
ResidualBlock(in_dim, in_dim),
# input is Z, going into a convolution
nn.ConvTranspose2d(in_channels=in_dim, out_channels=conv_dim*8,
kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(conv_dim * 8),
nn.ReLU(True),
# state size. (conv_dim*8) x 4 x 4
nn.ConvTranspose2d(in_channels=conv_dim*8, out_channels=conv_dim*4,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(conv_dim * 4),
nn.ReLU(True),
# state size. (conv_dim*4) x 8 x 8
nn.ConvTranspose2d(in_channels=conv_dim*4, out_channels=conv_dim*2,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(conv_dim * 2),
nn.ReLU(True),
# state size. (conv_dim*2) x 16 x 16
nn.ConvTranspose2d(in_channels=conv_dim*2, out_channels=conv_dim,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(conv_dim),
nn.ReLU(True),
)
# state size. (conv_dim) x 32 x 32
self.img_gen = nn.Sequential(
nn.ConvTranspose2d(in_channels=conv_dim, out_channels=out_dim,
kernel_size=4, stride=2, padding=1, bias=False),
nn.Tanh()
)
# state size. (out_dim) x 64 x 64
def forward(self, input):
return self.img_gen(self.feat_extractor(input))