I’ve been implementing CNN for regression and classification.

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from einops import rearrange

from typing import List, Union, Optional, Tuple

    from import Sequence as SequenceCollection
    from collections import Sequence as SequenceCollection

class TorchCNN(nn.Module):
    """A 1, 2, or 3 dimensional convolutional network for either regression or classification.

    The network consists of the following sequence of layers:
    - A configurable number of convolutional layers
    - A global pooling layer (either max pool or average pool)
    - A final dense layer to compute the output

    It optionally can compose the model from pre-activation residual blocks, as
    described in, rather than a simple stack of
    convolution layers.  This often leads to easier training, especially when using a
    large number of layers.  Note that residual blocks can only be used when
    successive layers have the same output shape.  Wherever the output shape changes, a
    simple convolution layer will be used even if residual=True.

    >>> model = CNN(n_tasks=5,n_features=8,dims=2,layer_filters=[3,8,8,16],kernel_size=3, n_classes = 5,mode='classification',uncertainty=False)
    >>> x = torch.ones(2,8,224,224)
    >>> y = model(x)
    >>> y.shape


    def __init__(self,
                 n_tasks: int,
                 n_features: int,
                 dims: int,
                 layer_filters: List[int] = [100],
                 kernel_size: Optional[Union[int, List[int]]] = 5,
                 strides: Optional[Union[int, List[int]]] = 1,
                 dropouts: Optional[Union[int, List[int]]] = 0.5,
                 activation_fns: Optional[Union[nn.Module, List[nn.Module]]] = nn.ReLU,
                 pool_type: Optional[str] = 'max',
                 mode: Optional[str] = 'classification',
                 n_classes: Optional[int] = 2,
                 uncertainty: Optional[bool] = False,
                 residual: Optional[bool] = False,
                 padding: Optional[Union[str, Union[int, Tuple[int]]]] = 'valid') -> None:

        """Create a CNN.
        n_tasks: int
          number of tasks
        n_features: int
          number of features
        dims: int
          the number of dimensions to apply convolutions over (1, 2, or 3)
        layer_filters: list
          the number of output filters for each convolutional layer in the network.
          The length of this list determines the number of layers.
        kernel_size: int, tuple, or list
          a list giving the shape of the convolutional kernel for each layer.  Each
          element may be either an int (use the same kernel width for every dimension)
          or a tuple (the kernel width along each dimension).  Alternatively this may
          be a single int or tuple instead of a list, in which case the same kernel
          shape is used for every layer.
        strides: int, tuple, or list
          a list giving the stride between applications of the  kernel for each layer.
          Each element may be either an int (use the same stride for every dimension)
          or a tuple (the stride along each dimension).  Alternatively this may be a
          single int or tuple instead of a list, in which case the same stride is
          used for every layer.
        dropouts: list or float
          the dropout probablity to use for each layer.  The length of this list should equal len(layer_filters).
          Alternatively this may be a single value instead of a list, in which case the same value is used for every layer.
        activation_fns: list or object
          the activation function to apply to each layer.  The length of this list should equal
          len(layer_filters).  Alternatively this may be a single value instead of a list, in which case the
          same value is used for every layer.
        pool_type: str
          the type of pooling layer to use, either 'max' or 'average'
        mode: str
          Either 'classification' or 'regression'
        n_classes: int
          the number of classes to predict (only used in classification mode)
        uncertainty: bool
          if True, include extra outputs and loss terms to enable the uncertainty
          in outputs to be predicted
        residual: bool
          if True, the model will be composed of pre-activation residual blocks instead
          of a simple stack of convolutional layers.
        padding: str, int or tuple
          the padding to use for convolutional layers, either 'valid' or 'same'

        super(TorchCNN, self).__init__()

        if dims not in (1, 2, 3):
            raise ValueError('Number of dimensions must be 1, 2 or 3')

        if mode not in ['classification', 'regression']:
            raise ValueError("mode must be either 'classification' or 'regression'")

        self.n_tasks = n_tasks
        self.n_features = n_features
        self.dims = dims
        self.mode = mode
        self.n_classes = n_classes
        self.uncertainty = uncertainty
        self.mode = mode
        self.layer_filters = layer_filters
        self.residual = residual

        n_layers = len(layer_filters)

        # PyTorch layers require input and output channels as parameter
        # if only one layer to make the model creating loop below work, multiply layer_filters wutg 2
        if len(layer_filters) == 1:
            layer_filters = layer_filters * 2

        if not isinstance(kernel_size, list):
            kernel_size = [kernel_size] * n_layers
        if not isinstance(strides, SequenceCollection):
            strides = [strides] * n_layers
        if not isinstance(dropouts, SequenceCollection):
            dropouts = [dropouts] * n_layers
        if not isinstance(activation_fns, SequenceCollection):
            activation_fns = [activation_fns] * n_layers

        if uncertainty:

            if mode != 'regression':
                raise ValueError("Uncertainty is only supported in regression mode")

            if any(d == 0.0 for d in dropouts):
                raise ValueError('Dropout must be included in every layer to predict uncertainty')

        # Python tuples use 0 based indexing, dims defines number of dimension for convolutional operation
        ConvLayer = (nn.Conv1d, nn.Conv2d, nn.Conv3d)[self.dims - 1]

        if pool_type == 'average':
            PoolLayer = (nn.AvgPool1d, nn.AvgPool2d, nn.AvgPool3d)[self.dims - 1]
        elif pool_type == 'max':
            PoolLayer = (nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d)[self.dims - 1]
            raise ValueError("pool_type must be either 'average' or 'max'")

        self.layers = nn.ModuleList()

        in_shape = n_features

        for out_shape, size, stride, dropout, activation_fn in zip(
                layer_filters, kernel_size, strides, dropouts,


            if dropout > 0.0:

            if activation_fn is not None:

            in_shape = out_shape

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x: torch.Tensor
          Input Tensor

          The tensor to be fed into ODEBlock,
          the length of this tensor is equal to ODEBlock Input Dimension
        prev_layer = x

        for layer in self.layers:
            x = layer(x)
            # residual blocks can only be used when successive layers have the same output shape
            if self.residual and layer.in_channels == layer.out_channels:
                x = x + prev_layer
            prev_layer = x

        outputs, output_types = x, None
        batch_size = x.shape[0]

        pattern = ("b c h -> b (c h)", "b c h w -> b (c h w)", "b c h w a -> b (c h w a)")[self.dims - 1]
        x = rearrange(x, pattern)

        if self.mode is "classification":

            logits = nn.Linear(x.shape[1], self.n_tasks * self.n_classes)(x)
            logits = logits.view(batch_size, self.n_tasks, self.n_classes)
            output = F.softmax(logits, dim=1)

            outputs = [output, logits]
            assert logits.shape == (batch_size, self.n_tasks, self.n_classes)


            output = nn.Linear(x.shape[1], self.n_tasks)(x)
            print(f"Output Regression {output.shape}")
            output = output.view(batch_size, self.n_tasks)

            if self.uncertainty:
                log_var = (nn.Linear(x.shape[1], self.n_tasks)(x))
                log_var = log_var.view(batch_size, self.n_tasks, 1)
                var = torch.exp(log_var)
                outputs = [output, var, output, log_var]

                outputs = [output]

        return outputs

My use case only allows me to pass numpy arrays as inputs to model. An example is as following

def test_cnn2d_torch():

  n_tasks = 5
  n_features = 8
  n_classes = 7
  batch_size = 2
  mode = 'regression'
  model = TorchCNN(n_tasks=n_tasks,
                   dropouts=[0.5, 0.2, 0.4, 0.7],
                   layer_filters=[3, 8, 8, 16],

  x = np.ones((batch_size, 8, 196, 196))
  y = model(x)

But when I do so I face following error :

TypeError: conv2d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, str, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !str!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, str, !tuple!, int)

Hi Anshuman!

Then your use case won’t work with pytorch. Pytorch models take pytorch
tensors as inputs.

Consider converting your numpy array to a pytorch tensor before passing
it to your model.


K. Frank