How to initialize feed forward network in Multitask CNN?

I’m trying to implement a CNN.

class CNN(nn.Module):

    def __init__(self,
                 n_tasks: int,
                 n_features: int,
                 dims: int,
                 layer_filters: List[int] = [100],
                 kernel_size: Optional[Union[int, List[int]]] = 5,
                 strides: Optional[Union[int, List[int]]] = 1,
                 dropouts: Optional[Union[int, List[int]]] = 0.5,
                 activation_fns: Optional[Union[nn.Module, List[nn.Module]]] = nn.ReLU,
                 pool_type: Optional[str] = 'max',
                 mode: Optional[str] = 'classification',
                 n_classes: Optional[int] = 2,
                 uncertainty: Optional[bool] = False,
                 residual: Optional[bool] = False,
                 padding: Optional[Union[str, Union[int, Tuple[int]]]] = 'valid') -> None:

        super(CNN, self).__init__()

        if dims not in (1, 2, 3):
            raise ValueError('Number of dimensions must be 1, 2 or 3')

        if mode not in ['classification', 'regression']:
            raise ValueError("mode must be either 'classification' or 'regression'")

        self.n_tasks = n_tasks
        self.n_features = n_features
        self.dims = dims
        self.mode = mode
        self.n_classes = n_classes
        self.uncertainty = uncertainty
        self.mode = mode
        self.layer_filters = layer_filters
        self.residual = residual

        n_layers = len(layer_filters)

        # PyTorch layers require input and output channels as parameter
        # if only one layer to make the model creating loop below work, multiply layer_filters wutg 2
        if len(layer_filters) == 1:
            layer_filters = layer_filters * 2

        if not isinstance(kernel_size, list):
            kernel_size = [kernel_size] * n_layers
        if not isinstance(strides, SequenceCollection):
            strides = [strides] * n_layers
        if not isinstance(dropouts, SequenceCollection):
            dropouts = [dropouts] * n_layers
        if not isinstance(activation_fns, SequenceCollection):
            activation_fns = [activation_fns] * n_layers

        if uncertainty:

            if mode != 'regression':
                raise ValueError("Uncertainty is only supported in regression mode")

            if any(d == 0.0 for d in dropouts):
                raise ValueError('Dropout must be included in every layer to predict uncertainty')

        # Python tuples use 0 based indexing, dims defines number of dimension for convolutional operation
        ConvLayer = (nn.Conv1d, nn.Conv2d, nn.Conv3d)[self.dims - 1]

        if pool_type == 'average':
            PoolLayer = (nn.AvgPool1d, nn.AvgPool2d, nn.AvgPool3d)[self.dims - 1]
        elif pool_type == 'max':
            PoolLayer = (nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d)[self.dims - 1]
        else:
            raise ValueError("pool_type must be either 'average' or 'max'")

       
        self.layers = nn.ModuleList()

        in_shape = n_features

        for out_shape, size, stride, dropout, activation_fn in zip(
                layer_filters, kernel_size, strides, dropouts,
                activation_fns):

            self.layers.append(
                ConvLayer(in_channels=in_shape,
                          out_channels=out_shape,
                          kernel_size=size,
                          stride=stride,
                          padding=padding,
                          dilation=1,
                          groups=1,
                          bias=True))

            if dropout > 0.0:
                self.layers.append(nn.Dropout(dropout))

            if activation_fn is not None:
                self.layers.append(activation_fn())

            self.layers.append(PoolLayer(size))
            
            
            in_shape = out_shape
        
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:

        prev_layer = x

        for layer in self.layers:
            x = layer(x)
            # residual blocks can only be used when successive layers have the same output shape
            if self.residual and layer.in_channels == layer.out_channels:
                x = x + prev_layer
            prev_layer = x

        outputs, output_types = x, None
        batch_size = x.shape[0]

        x = torch.reshape(x, (batch_size, -1))

        if self.mode == "classification":

            logits = nn.Linear(x.shape[1], self.n_tasks * self.n_classes)(x)
            logits = logits.view(batch_size, self.n_tasks, self.n_classes)
            output = F.softmax(logits, dim=1)

            outputs = [output, logits]
            assert logits.shape == (batch_size, self.n_tasks, self.n_classes)

        else:

            output = nn.Linear(x.shape[1], self.n_tasks)(x)
            output = output.view(batch_size, self.n_tasks)
            
            if self.uncertainty:
                log_var = (nn.Linear(x.shape[1], self.n_tasks)(x))
                log_var = log_var.view(batch_size, self.n_tasks, 1)
                var = torch.exp(log_var)
                outputs = [output, var, output, log_var]

            else:
                outputs = [output]
            
        return outputs

My use case requires it to be a regressor/classifier based on the arguments.
And the corresponding feedforward networks, I’m defining in forward() method.
The model works, but the issue is, the score is not good.

Reason :

Initialising ffn in forward blocks itself. See logits = nn.Linear(x.shape[1], self.n_tasks * self.n_classes)(x)

What happens here is that we are initialising the linear layer again and again with random weights. And thus optimize’s optimization doesn’t reflect, hence the poor score.

I want to initialise these ffn in constructor itself but the issue here is we don’t know the shape of input tensor, after x = torch.reshape(x, (batch_size, -1))
and torch requires input layer size.

Can anyone help me out ?

I was playing around with einops.torch.layers.Rearrange to append a nn.Module to reshape in constructor, but post that still the same problem, how to know input shape ?

If you cannot compute the number of input features for some layers, you could use the nn.Lazy* layers which would use the actual input tensor to determine the feature dimension.

1 Like

Thank youu ! That solves the issue .