Dynamically changing layers not working

I am trying to implement the PGGAN and have the following code for my discriminator model:

# New D model
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
                
        # First conv corresponds to fromRGB
        self.fromRGB = None
        #self.fromRGB = from_to_RGB(in_channels=3, out_channels=16)
        
        self.block1 = None
        self.block2 = None
        self.block3 = None
        self.block4 = None
        self.block5 = None
        self.block6 = None
        self.block7 = None
        # If none we add the model in the forward pass? 
        # issue remains of last layer and the FC layer
        
        # Now let's try and do this dynamically using a list?
        
        
        # FC output layer
        # I will hardcode the different FC layers
        self.FC1 = None
        
        # The output sigmoid
        self.sigmoid = nn.Sigmoid()
        
    # We can use this method to reset the FC layer after we complete training for each block
    def reset_output_layer(self):
        self.FC1 = None
    
    def set_fromRGB(self):
        self.fromRGB = None
        
    # def activate_layer(self, layer)  Perhaps this is a better way to init the layers?
    
    def forward(self, x, layer_num=1, alpha=0):
        # This is the fromRGB transform
        #print(x.shape)
        
        # For now, instead of inferring the number of input channels expected by the layer after fromRGB
        # Let's hardcode it with a list and we will reinitialise the fromRGB layer each time we grow the network
        # until we reach layer 7 at which point fromRGB will be locked in
        expected_channels = [512, 512, 256, 128, 64, 32, 16]
        if alpha == 0:
            self.fromRGB = None
            self.fromRGB = from_to_RGB(in_channels=3, out_channels=expected_channels[layer_num-1]).to(x.device)
        x = self.fromRGB(x)
        
        #if layer_num == 3:
            #print(f'X after fromRGB: {x.shape}')
        
        # Block 7
        if layer_num >= 7:
            #print("BLOCK 7 ACTIVE")
            if self.block1 is None:
                self.block1 = d_conv_block(in_channels=16, out_channels=32, kernel_size1=(3,3)).to(x.device)
            x = self.block1(x)    
        # Block 6
        if layer_num >= 6:
            #print("BLOCK 6 ACTIVE")
            if self.block2 is None:
                self.block2 = d_conv_block(in_channels=32, out_channels=64, kernel_size1=(3,3)).to(x.device)
            x = self.block2(x)
        # Block 5
        if layer_num >= 5:
            #print("BLOCK 5 ACTIVE")
            if self.block3 is None:
                self.block3 = d_conv_block(in_channels=64, out_channels=128, kernel_size1=(3,3)).to(x.device)
            x = self.block3(x)        
        # Block 4
        if layer_num >= 4:
            #print("BLOCK 4 ACTIVE")
            if self.block4 is None:
                self.block4 = d_conv_block(in_channels=128, out_channels=256, kernel_size1=(3,3)).to(x.device)
            x = self.block4(x)
        # Block 3
        if layer_num >= 3:
            #print("BLOCK 3 ACTIVE")
            if self.block5 is None:
                self.block5 = d_conv_block(in_channels=256, out_channels=512, kernel_size1=(3,3)).to(x.device)
            x = self.block5(x)        
        # Block 2
        if layer_num >= 2:
            #print("BLOCK 2 ACTIVE")
            if self.block6 is None:
                self.block6 = d_conv_block(in_channels=512, out_channels=512, kernel_size1=(3,3)).to(x.device)
            x = self.block6(x)        
        # Block 1
        if layer_num >= 1:
            #print("BLOCK 1 ACTIVE")
            if self.block7 is None:
                self.block7 = d_conv_block(in_channels=512, out_channels=512, kernel_size1=(3,3), kernel_size2=(4,4)).to(x.device)
            x = self.block7(x)
        
        # Last FC layer
        x = x.view(x.size(0), -1) # Reshape the output, i.e. flatten it 
        self.FC1 = d_output_layer(x.size(1)).to(x.device)
        #print(x.shape)
        x = self.FC1(x)
        
        # The output has to be passed through a sigmoid layer for our BCELoss
        x = self.sigmoid(x)
        
        return x

final_d = Discriminator().to(device)

The part of code:

expected_channels = [512, 512, 256, 128, 64, 32, 16]
        if alpha == 0:
            self.fromRGB = None
            self.fromRGB = from_to_RGB(in_channels=3, out_channels=expected_channels[layer_num-1]).to(x.device)
        x = self.fromRGB(x)

Is showing an error when I switch alpha from 0 to 0.05, the error being:

RuntimeError: Given groups=1, weight of size [256, 256, 3, 3], expected input[32, 512, 16, 16] to have 256 channels, but got 512 channels instead

It seems to be that when alpha changes to 0.05 the fromRGB layer reverts to the old one even though I set fromRGB to match current expected output dims everytime alpha=0?

Hi YM!

You haven’t show us what from_to_RGB() nor d_conv_block() nor
d_output_layer() does nor where you call forward() with various
values of layer_num and alpha, so I can’t really say one way or another.
But I expect that you may not have located the actual location where your
error occurs.

weight of size [256, 256, 3, 3] suggests that you’re passing a tensor
to a Conv2d (in_channels = 256, out_channels = 256, kernel = 3),
but I don’t see any such Conv2d in your (incomplete) code.

I would suggest printing out the .shape of x and the .weight.shapes of
your self.blocks as you progress through forward(), both to see where
the error occurs, but, more generally, to see what is going on.

As an aside, there is nothing logically “wrong” with modifying your layers
in forward(), but is seems like a bad design to me because, among
other reasons, it violates what we sometimes call “separation of concerns.”

Best.

K. Frank

Hey @KFrank!

Thanks for your reply! I’ll include those functions here:

def d_conv_block(in_channels, out_channels, kernel_size1=None, kernel_size2=None):
    if kernel_size2 is not None:
        block = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size1, padding=(1,1)),
            nn.BatchNorm2d(in_channels, affine=False),
            nn.LeakyReLU(0.2),
            nn.Conv2d(in_channels, out_channels, kernel_size2, padding=(1,1)),
            nn.BatchNorm2d(out_channels, affine=False),
            nn.LeakyReLU(0.2),
            # Downsample
            nn.AvgPool2d(kernel_size=(2,2)),
        )
    else:
        block = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size1, padding=(1,1)),
            nn.BatchNorm2d(in_channels, affine=False),
            nn.LeakyReLU(0.2),
            nn.Conv2d(in_channels, out_channels, kernel_size1, padding=(1,1)),
            nn.BatchNorm2d(out_channels, affine=False),
            nn.LeakyReLU(0.2),
            # Downsample
            nn.AvgPool2d(kernel_size=(2,2)),
        )
    
    return block

# Here we remove the nn.Upsample and it will be done externally
def g_conv_block(in_channels, out_channels, kernel_size1=None, kernel_size2=None, upsample=False):
    if upsample:
        block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size1, padding=(1,1)),
            nn.BatchNorm2d(out_channels, affine=False),
            nn.LeakyReLU(0.2),
            nn.Conv2d(out_channels, out_channels, kernel_size1, padding=(1,1)),
            nn.BatchNorm2d(out_channels, affine=False),
            nn.LeakyReLU(0.2),
        )
    else:
        block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size1, padding=(3,3)),
            nn.BatchNorm2d(out_channels, affine=False),
            nn.LeakyReLU(0.2),
            nn.Conv2d(out_channels, out_channels, kernel_size2, padding=(1,1)),
            nn.BatchNorm2d(out_channels, affine=False),
            nn.LeakyReLU(0.2),
        )
    
    return block

# Create a function to create the output layer?
def d_output_layer(input_dim):
    layer = nn.Linear(input_dim, 1)
    return layer

def from_to_RGB(in_channels, out_channels):
    block = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=(1,1)),
        nn.LeakyReLU(0.2),
    )
    return block

But yes you were right the issue wasnt with this code. I have got it working now, there was a line of code in my training loop which was wrong. When I made the second call to the discriminator for generated images I had failed to include the parameters alpha and layer_nums.

Also, regarding changing the modifying of layers in forward() I see why this is an issue, but I was a bit stumped as where else to make the changes. Would you recommend anything (perhaps I should move the logic to a separate method)?

Thanks

YM

Hi YM!

Off the top of my head, I think I would package that logic in a method
of Discriminator, something like add_layer_to_model() or such.

The basic idea is that modifying your model (add_layer_to_model())
and performing a forward pass (forward()) are two different things, so
your code is better organized if you keep them separate in two different
methods.

Best.

K. Frank

Hi KFrank,

Okay I think I get you, I appreciate the help and I’ll try and implement it this way :slight_smile:

Thanks,

YM