Initialize a DNN with the weights matrix of an autoencoder

I’m trying to implement this paper following this matlab code, but I’m new to pytorch, so it’s a little over my head.
The idea is to use the optimized weights of the autoencoder to initialize the DNN.
My questions are given the below autoencoder and DNN:
1- How to make the autoencoder return the parameters (weight+bias)?
2- How to initialize the layers of the DNN with those parameters?

Stacked AE:

class AutoEncoder(nn.Module):

    def __init__(self, input_size, output_size):
        super(AutoEncoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(input_size, output_size),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.Linear(output_size, input_size), 
            nn.ReLU(),
        )

        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.SGD(self.parameters(), lr=0.1)

    def forward(self, x):
        x = x.detach()
        y = self.encoder(x)

        if self.training:
            x_reconstruct = self.decoder(y)
            loss = self.criterion(x_reconstruct, Variable(x.data, requires_grad=False))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
        return y.detach()

    def reconstruct(self, x):
        return self.decoder(x)


class StackedAutoEncoder(nn.Module):
    r"""
    A stacked autoencoder made from the autoencoders above.
    Each autoencoder is trained independently and at the same time.
    """

    def __init__(self):
        super(StackedAutoEncoder, self).__init__()

        self.ae1 = AutoEncoder(54, 20)
        self.ae2 = AutoEncoder(20, 8)
        self.ae3 = AutoEncoder(8, 3)
        self.ae4 = AutoEncoder(3, 1)


    def forward(self, x):
        a1 = self.ae1(x)
        a2 = self.ae2(a1)
        a3 = self.ae3(a2)
        a4 = self.ae4(a3)
        return a1, a2, a3,a4

and the DNN:

class DNN(nn.Module):

    n_classes = 1

    def __init__(self, d_in, n_layers, units, dropout, activation=nn.GELU, final_act=nn.Sigmoid):

        super().__init__()

        assert len(units) == n_layers

        self.d_in = d_in
        self.n_layers = n_layers
        self.units = units
        self.activation = activation
        self.final_act = final_act

        if dropout is None:
            self.dropout = None
        else:
            self.dropout = nn.Dropout(p=dropout)

        if dropout is None:
            layers = [nn.Linear(d_in, units[0]), self.activation()]
            if n_layers > 1:
                for i, u in enumerate(units):
                    if i > 0:
                        layers.extend([nn.Linear(units[i-1], units[i]),
                                       self.activation()])
        else:
            layers = [nn.Linear(d_in, units[0]), self.activation(),
                      self.dropout]
            if n_layers > 1:
                for i, u in enumerate(units):
                    if i > 0:
                        layers.extend([nn.Linear(units[i-1], units[i]),
                                       self.activation(), self.dropout])

        self.final_layer = [nn.Linear(units[-1], self.n_classes), self.final_act()]
        layers.extend(self.final_layer)

        self.layers = nn.Sequential(*layers)

    def forward(self, x):

        return self.layers(x)

Hi @lima, thanks for posting.

If you want the autoencoder to return the parameters, you can simply call model.parameters(), model.named_parameters(), or model.state_dict() (the latter one involves buffers as well), which will give you all the parameters of the encoder, then in your DDN, you can manually copy the parameters over or use model.load_state_dict() if the model architecture is the same as your saved state_dict. For details you can refer to this note

Thank you @wanchaol for your input, but what if I want to have the output of the first layer (or any particular layer) and use that as the input of another model?

couldn’t you just store the output of the first layer somewhere after the first layer execution, and save it if you want, then use it as the input of another model? what’s the blocker here?