Unexpected non-trainable params

YuyXiang · January 5, 2023, 12:31pm

Hi, in the ModuleList, I only set the requires_grad of the Haar transform to False. But the whole ModuleList’s params are non-trainable. And I can’t find the reason.

class RNVP(nn.Module):

    def __init__(self, dims_in, message_length, diffusion_length=256, down_num=3, block_num=[4, 4, 6]):
        super(RNVP, self).__init__()

        # diffussion block
        self.in_channel = 3
        self.dims_in = dims_in
        self.H = dims_in[0][1]
        self.W = dims_in[0][2]
        self.diffusion_length = diffusion_length
        self.diffusion_size = int(self.diffusion_length ** 0.5)

        self.linear1 = nn.Linear(message_length, self.diffusion_length)
        self.linear2 = nn.Linear(message_length, self.diffusion_length)
        self.linear3 = nn.Linear(message_length, self.diffusion_length)
        
        self.msg_up1 = ExpandNet(1, 1, 3)
        self.msg_up2 = ExpandNet(1, 1, 3)
        self.msg_up3 = ExpandNet(1, 1, 3)

        self.linear_rev1 = nn.Linear(self.H*self.W, message_length)
        self.linear_rev2 = nn.Linear(self.H*self.W, message_length)
        self.linear_rev3 = nn.Linear(self.H*self.W, message_length)

        self.HaarDown = HaarDownsampling(dims_in)

        # RNVP in UNet
        operations = []

        # down
        current_dims = dims_in
        for i in range(down_num):
            if i != 0:
                b = HaarDownsampling(current_dims)
                operations.append(b)

                current_dims[0][0] = current_dims[0][0] * 4
                current_dims[0][1] = current_dims[0][1] // 2
                current_dims[0][2] = current_dims[0][2] // 2

            else:
                current_dims[0][0] = current_dims[0][0] * 4 * 2
                current_dims[0][1] = current_dims[0][1] // 2
                current_dims[0][2] = current_dims[0][2] // 2
            
            for j in range(block_num[i]):
                b = RNVPCouplingBlock(current_dims, subnet_constructor=ResidualDenseBlock, clamp=1.0)
                # b = InvBlock()
                operations.append(b)
        
        # up
        block_num = block_num[:-1][::-1]
        block_num.append(0)
        for i in range(down_num):
            if i != 2:
                b = HaarUpsampling(current_dims)
                operations.append(b)
            
            current_dims[0][0] = current_dims[0][0] // 4
            current_dims[0][1] = current_dims[0][1] * 2
            current_dims[0][2] = current_dims[0][2] * 2
            for j in range(block_num[i]):
                b = RNVPCouplingBlock(current_dims, subnet_constructor=ResidualDenseBlock, clamp=1.0)
                # b = InvBlock()
                operations.append(b)

        self.operations = nn.ModuleList(operations)

======================================================================
Layer (type:depth-idx)                        Param #
======================================================================
├─Model: 1-1                                  --
|    └─RNVP: 2-1                              --
|    |    └─Linear: 3-1                       7,936
|    |    └─Linear: 3-2                       7,936
|    |    └─Linear: 3-3                       7,936
|    |    └─ExpandNet: 3-4                    21
|    |    └─ExpandNet: 3-5                    21
|    |    └─ExpandNet: 3-6                    21
|    |    └─Linear: 3-7                       491,550
|    |    └─Linear: 3-8                       491,550
|    |    └─Linear: 3-9                       491,550
|    |    └─HaarDownsampling: 3-10            (48)
|    |    └─ModuleList: 3-11                  (28,595,840)
======================================================================
Total params: 30,094,409
Trainable params: 1,498,521
Non-trainable params: 28,595,888
======================================================================

ptrblck · January 5, 2023, 10:30pm

Your code is unfortunately incomplete and thus not executable. Could you post a minimal, executable code snippet to reproduce the issue, please?

YuyXiang · January 6, 2023, 5:11am

import torch
import torch.nn as nn
import numpy as np
from torchsummary import summary
import torch.nn.functional as F


class HaarDownsampling(nn.Module):

    def __init__(self, dims_in):
        super().__init__()

        self.in_channels = dims_in[0][0]
        self.haar_weights = torch.ones(4,1,2,2)

        self.haar_weights[1, 0, 0, 1] = -1
        self.haar_weights[1, 0, 1, 1] = -1

        self.haar_weights[2, 0, 1, 0] = -1
        self.haar_weights[2, 0, 1, 1] = -1

        self.haar_weights[3, 0, 1, 0] = -1
        self.haar_weights[3, 0, 0, 1] = -1

        self.haar_weights = torch.cat([self.haar_weights]*self.in_channels, 0)
        self.haar_weights = nn.Parameter(self.haar_weights)
        self.haar_weights.requires_grad = False


class HaarUpsampling(nn.Module):

    def __init__(self, dims_in):
        super().__init__()

        self.in_channels = dims_in[0][0] // 4
        self.haar_weights = torch.ones(4, 1, 2, 2)

        self.haar_weights[1, 0, 0, 1] = -1
        self.haar_weights[1, 0, 1, 1] = -1

        self.haar_weights[2, 0, 1, 0] = -1
        self.haar_weights[2, 0, 1, 1] = -1

        self.haar_weights[3, 0, 1, 0] = -1
        self.haar_weights[3, 0, 0, 1] = -1

        self.haar_weights = torch.cat([self.haar_weights]*self.in_channels, 0)
        self.haar_weights = nn.Parameter(self.haar_weights)
        self.haar_weights.requires_grad = False


class ResidualDenseBlock(nn.Module):
    def __init__(self, input, output, bias=True):
        super(ResidualDenseBlock, self).__init__()
        self.conv1 = nn.Conv2d(input, 32, 3, 1, 1, bias=bias)
        self.conv2 = nn.Conv2d(input + 32, 32, 3, 1, 1, bias=bias)
        self.conv3 = nn.Conv2d(input + 2 * 32, 32, 3, 1, 1, bias=bias)
        self.conv4 = nn.Conv2d(input + 3 * 32, 32, 3, 1, 1, bias=bias)
        self.conv5 = nn.Conv2d(input + 4 * 32, output, 3, 1, 1, bias=bias)
        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)


class RNVPCouplingBlock(nn.Module):

    def __init__(self, dims_in, subnet_constructor=ResidualDenseBlock, clamp=1.0):
        super().__init__()

        channels = dims_in[0][0]
        self.ndims = len(dims_in[0])
        self.split_len1 = channels // 2
        self.split_len2 = channels - channels // 2

        self.clamp = clamp
        self.affine_eps = 0.0001

        self.s1 = subnet_constructor(self.split_len1, self.split_len2)
        self.t1 = subnet_constructor(self.split_len1, self.split_len2)
        self.s2 = subnet_constructor(self.split_len2, self.split_len1)
        self.t2 = subnet_constructor(self.split_len2, self.split_len1)


class RNVP(nn.Module):

    def __init__(self, dims_in, down_num=3, block_num=[4, 4, 6]):
        super(RNVP, self).__init__()

        # RNVP in UNet
        operations = []

        # down
        current_dims = dims_in
        for i in range(down_num):
            if i != 0:
                b = HaarDownsampling(current_dims)
                operations.append(b)

                current_dims[0][0] = current_dims[0][0] * 4
                current_dims[0][1] = current_dims[0][1] // 2
                current_dims[0][2] = current_dims[0][2] // 2

            else:
                current_dims[0][0] = current_dims[0][0] * 4 * 2
                current_dims[0][1] = current_dims[0][1] // 2
                current_dims[0][2] = current_dims[0][2] // 2
            
            for j in range(block_num[i]):
                b = RNVPCouplingBlock(current_dims, subnet_constructor=ResidualDenseBlock, clamp=1.0)
                # b = InvBlock()
                operations.append(b)
        
        # up
        block_num = block_num[:-1][::-1]
        block_num.append(0)
        for i in range(down_num):
            if i != 2:
                b = HaarUpsampling(current_dims)
                operations.append(b)
            
            current_dims[0][0] = current_dims[0][0] // 4
            current_dims[0][1] = current_dims[0][1] * 2
            current_dims[0][2] = current_dims[0][2] * 2
            for j in range(block_num[i]):
                b = RNVPCouplingBlock(current_dims, subnet_constructor=ResidualDenseBlock, clamp=1.0)
                # b = InvBlock()
                operations.append(b)

        self.operations = nn.ModuleList(operations)

class Model(nn.Module):
    def __init__(self, dims, channels=64, diffusion_length=256):
        super(Model, self).__init__()

        self.model = RNVP(dims)

    # def forward(self, cover, msg, rev=False):
    #     if not rev:
    #         out = self.model(cover, msg)

    #     else:
    #         out = self.model(cover, msg, rev=True)

    #     return out

def main():

    net = Model([[3,128,128]])
    # net = RNVP([[3,128,128]])

    net = torch.nn.DataParallel(net, device_ids=[0])
    net.cuda(device='cuda')

    summary(net, input_size=[(3, 128, 128),(64, 1)], batch_size=2)


if __name__ == "__main__":
    main()

Hi, this code snipper can reproduce the issue. And I found when I instantiate the RNVP directly, all parameters are trainable, which is what I expect. But when I instantiate the Model, all parameters are non-trainable.

ptrblck · January 6, 2023, 6:29am

Thanks for the code update.
I cannot reproduce the issue and using:

for name, param in net.named_parameters():
    print("param {}, is trainable {}".format(name, param.requires_grad))

after initializing the model gives:

param module.model.operations.0.s1.conv1.weight, is trainable True
param module.model.operations.0.s1.conv1.bias, is trainable True
param module.model.operations.0.s1.conv2.weight, is trainable True
param module.model.operations.0.s1.conv2.bias, is trainable True
param module.model.operations.0.s1.conv3.weight, is trainable True
param module.model.operations.0.s1.conv3.bias, is trainable True
param module.model.operations.0.s1.conv4.weight, is trainable True
param module.model.operations.0.s1.conv4.bias, is trainable True
param module.model.operations.0.s1.conv5.weight, is trainable True
param module.model.operations.0.s1.conv5.bias, is trainable True
param module.model.operations.0.t1.conv1.weight, is trainable True
param module.model.operations.0.t1.conv1.bias, is trainable True
...

which indicates that the operations nn.ModuleList contains trainable submodules and parameters.

YuyXiang · January 6, 2023, 8:00am

Thanks for your reply！