How to apply softplus on part of the output tensor

mishooax · October 20, 2020, 5:39pm

Here’s my simple NN structure:

class DNN(nn.Module):
    def __init__(self,
                 input_layer_size: int,
                 hidden_layer_sizes: List[int],
                 dropout_rate: float,
                 debug: bool = False):
        '''
            Set up the network.
            Args:
                input_layer_size: size of the input layer
                hidden_layer_sizes: sizes of the hidden linear layers
                    e.g. [5,5,3,2] -> linear layers 5,5 -> 5,3 -> 3,2
                dropout_rate: dropout rate
        '''
        super().__init__()

        self.debug = debug
        self.linear_layers_list: List[nn.Module] = []

        self.linear_layers.append(nn.Linear(input_layer_size, hidden_layer_sizes[0]))
        self.linear_layers.append(nn.LeakyReLU(0.04))
        self.linear_layers.append(nn.BatchNorm1d(hidden_layer_sizes[0]))
        self.linear_layers.append(nn.Dropout(p=dropout_rate))

        # hidden layers
        for in_size, out_size in zip(hidden_layer_sizes[:-2], (hidden_layer_sizes[1:-1])):
            self.linear_layers.append(nn.Linear(in_size, out_size))
            self.linear_layers.append(nn.LeakyReLU(0.04))
            self.linear_layers.append(nn.BatchNorm1d(out_size))
            self.linear_layers.append(nn.Dropout(p=dropout_rate))

        # output layer
        output_layer_mean = nn.Linear(hidden_layer_sizes[-1], 1)
        output_layer_sigma = nn.Linear(hidden_layer_sizes[-1], 1)
        # make sure that sigma > 0
        output_layer = torch.cat(output_layer_mean, F.softplus(output_layer_sigma))  <--- NO GOOD
        self.linear_layers_list.append(output_layer)

        self.linear_layers = torch.nn.ModuleList(self.linear_layers_list)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        for layer in self.linear_layers_list:
            x = layer(x)
        return x

I need the output of output_layer_sigma to be positive so I’m running it through a softplus. However, I get the following error at run time:

File "(...)", line 56, in __init__
    output_layer = torch.cat(output_layer_mean, F.softplus(output_layer_sigma)) 
TypeError: softplus(): argument 'input' (position 1) must be Tensor, not Linear

How do I make sure the 2nd value in my output tensor is always positive? should i call F.softplus in forward instead of __init__()? Or use the nn.Softplus module? Or is there a better, “pytorch-standard” way of doing things? Thanks!

PS: Here’s what this would look like in tf.keras:

# output layer - enforce a positive sigma
mean_output = keras.layers.Dense(1)(last_hidden_layer)
sigma_output = keras.layers.Dense(1, activation='softplus')(last_hidden_layer)
output_layer = keras.layers.concatenate([mean_output, sigma_output])

KFrank · October 21, 2020, 1:22am

Hello Mishoo!

mishooax:

class DNN(nn.Module):
       ...
        # output layer
        output_layer_mean = nn.Linear(hidden_layer_sizes[-1], 1)
        output_layer_sigma = nn.Linear(hidden_layer_sizes[-1], 1)
        # make sure that sigma > 0
        output_layer = torch.cat(output_layer_mean, F.softplus(output_layer_sigma))  <--- NO GOOD
        self.linear_layers_list.append(output_layer)
...
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        for layer in self.linear_layers_list:
            x = layer(x)
        return x

should i call F.softplus in forward instead of __init__()?

Yes. As you see, you can’t apply softplus() to a Linear. You need
to apply it to the output of the Linear, which is a tensor.

I would not append output_layer (nor output_layer_mean nor
output_layer_sigma) to linear_layers_list. Something like this:

        output_layer_mean = nn.Linear(hidden_layer_sizes[-1], 1)
        output_layer_sigma = nn.Linear(hidden_layer_sizes[-1], 1)
        # do this stuff in forward ...
        # # make sure that sigma > 0
        # output_layer = torch.cat(output_layer_mean, F.softplus(output_layer_sigma))  <--- NO GOOD
        # self.linear_layers_list.append(output_layer)
...
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        for layer in self.linear_layers_list:
            x = layer(x)
        # haven't yet applied output_layer_mean nor output_layer_sigma 
        x = torch.cat((output_layer_mean(x), F.softplus(output_layer_sigma(x))), dim = -1)
        return x

Best.

K. Frank