I’m trying to implement a CNN.
class CNN(nn.Module):
def __init__(self,
n_tasks: int,
n_features: int,
dims: int,
layer_filters: List[int] = [100],
kernel_size: Optional[Union[int, List[int]]] = 5,
strides: Optional[Union[int, List[int]]] = 1,
dropouts: Optional[Union[int, List[int]]] = 0.5,
activation_fns: Optional[Union[nn.Module, List[nn.Module]]] = nn.ReLU,
pool_type: Optional[str] = 'max',
mode: Optional[str] = 'classification',
n_classes: Optional[int] = 2,
uncertainty: Optional[bool] = False,
residual: Optional[bool] = False,
padding: Optional[Union[str, Union[int, Tuple[int]]]] = 'valid') -> None:
super(CNN, self).__init__()
if dims not in (1, 2, 3):
raise ValueError('Number of dimensions must be 1, 2 or 3')
if mode not in ['classification', 'regression']:
raise ValueError("mode must be either 'classification' or 'regression'")
self.n_tasks = n_tasks
self.n_features = n_features
self.dims = dims
self.mode = mode
self.n_classes = n_classes
self.uncertainty = uncertainty
self.mode = mode
self.layer_filters = layer_filters
self.residual = residual
n_layers = len(layer_filters)
# PyTorch layers require input and output channels as parameter
# if only one layer to make the model creating loop below work, multiply layer_filters wutg 2
if len(layer_filters) == 1:
layer_filters = layer_filters * 2
if not isinstance(kernel_size, list):
kernel_size = [kernel_size] * n_layers
if not isinstance(strides, SequenceCollection):
strides = [strides] * n_layers
if not isinstance(dropouts, SequenceCollection):
dropouts = [dropouts] * n_layers
if not isinstance(activation_fns, SequenceCollection):
activation_fns = [activation_fns] * n_layers
if uncertainty:
if mode != 'regression':
raise ValueError("Uncertainty is only supported in regression mode")
if any(d == 0.0 for d in dropouts):
raise ValueError('Dropout must be included in every layer to predict uncertainty')
# Python tuples use 0 based indexing, dims defines number of dimension for convolutional operation
ConvLayer = (nn.Conv1d, nn.Conv2d, nn.Conv3d)[self.dims - 1]
if pool_type == 'average':
PoolLayer = (nn.AvgPool1d, nn.AvgPool2d, nn.AvgPool3d)[self.dims - 1]
elif pool_type == 'max':
PoolLayer = (nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d)[self.dims - 1]
else:
raise ValueError("pool_type must be either 'average' or 'max'")
self.layers = nn.ModuleList()
in_shape = n_features
for out_shape, size, stride, dropout, activation_fn in zip(
layer_filters, kernel_size, strides, dropouts,
activation_fns):
self.layers.append(
ConvLayer(in_channels=in_shape,
out_channels=out_shape,
kernel_size=size,
stride=stride,
padding=padding,
dilation=1,
groups=1,
bias=True))
if dropout > 0.0:
self.layers.append(nn.Dropout(dropout))
if activation_fn is not None:
self.layers.append(activation_fn())
self.layers.append(PoolLayer(size))
in_shape = out_shape
def forward(self, x: torch.Tensor) -> torch.Tensor:
prev_layer = x
for layer in self.layers:
x = layer(x)
# residual blocks can only be used when successive layers have the same output shape
if self.residual and layer.in_channels == layer.out_channels:
x = x + prev_layer
prev_layer = x
outputs, output_types = x, None
batch_size = x.shape[0]
x = torch.reshape(x, (batch_size, -1))
if self.mode == "classification":
logits = nn.Linear(x.shape[1], self.n_tasks * self.n_classes)(x)
logits = logits.view(batch_size, self.n_tasks, self.n_classes)
output = F.softmax(logits, dim=1)
outputs = [output, logits]
assert logits.shape == (batch_size, self.n_tasks, self.n_classes)
else:
output = nn.Linear(x.shape[1], self.n_tasks)(x)
output = output.view(batch_size, self.n_tasks)
if self.uncertainty:
log_var = (nn.Linear(x.shape[1], self.n_tasks)(x))
log_var = log_var.view(batch_size, self.n_tasks, 1)
var = torch.exp(log_var)
outputs = [output, var, output, log_var]
else:
outputs = [output]
return outputs
My use case requires it to be a regressor/classifier based on the arguments.
And the corresponding feedforward networks, I’m defining in forward()
method.
The model works, but the issue is, the score is not good.
Reason :
Initialising ffn in forward blocks itself. See logits = nn.Linear(x.shape[1], self.n_tasks * self.n_classes)(x)
What happens here is that we are initialising the linear layer again and again with random weights. And thus optimize’s optimization doesn’t reflect, hence the poor score.
I want to initialise these ffn in constructor itself but the issue here is we don’t know the shape of input tensor, after x = torch.reshape(x, (batch_size, -1))
and torch requires input layer size.
Can anyone help me out ?