Sharing parameters in two different instances

marco_zaror · March 12, 2020, 6:31pm

Hi,

I’ve got the model that you can see below, but I need to create two instances of them that shares x2h and h2h.
Does anyone know how to do it?

class RNN(nn.Module):
def init(self, input_size, hidden_size, output_size):
super(RNN, self).init()

    self.hidden_size = hidden_size
    self.x2h = nn.Linear(input_size, hidden_size)
    self.h2h = nn.Linear(hidden_size, hidden_size)
    self.h2o = nn.Linear(hidden_size, output_size)
    
    #self.softmax = nn.LogSoftmax(dim=1)
    self.softmax = nn.Softmax(dim=1)

def forward(self, input, hidden):

    hidden1 = self.x2h(input)
    hidden2 = self.h2h(hidden)
    hidden = hidden1 + hidden2
    output = self.h2o(hidden)
    output = self.softmax(output)
    
    return output, hidden

def initHidden(self):
    return torch.zeros(1, self.hidden_size)

yash1994 · March 13, 2020, 6:51am

simple. You just pass shared layers as argument to class initialization.

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, x2h, h2h):
        super(RNN, self).__init__()

        self.x2h = x2h
        self.h2h = h2h
        self.hidden_size = hidden_size
        self.h2o = nn.Linear(hidden_size, output_size)

        #self.softmax = nn.LogSoftmax(dim=1)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input, hidden):

        hidden1 = self.x2h(input)
        hidden2 = self.h2h(hidden)
        hidden = hidden1 + hidden2
        output = self.h2o(hidden)
        output = self.softmax(output)

        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

input_size = 1024
hidden_size = 512
output_size = 64

x2h = nn.Linear(input_size, hidden_size)
h2h = nn.Linear(hidden_size, hidden_size)

rnn1 = RNN(input_size, hidden_size, output_size, x2h, h2h)
rnn2 = RNN(input_size, hidden_size, output_size, x2h, h2h)

print(rnn1.x2h.weight)
print(rnn2.x2h.weight)

marco_zaror · March 13, 2020, 7:37am

Thank you very much!
I have one doubt with this solution. When rnn1 do a forward pass and updates its parameters, they are going to be updated also for rnn2? everytime? (This is what I want).
My doubt is because we are passing the values in the initialization and nowhere else…

yash1994 · March 13, 2020, 8:03am

Yes, i when you do forward pass and optimization step for any one model instance, it will automatically update parameters in shared layers.

marco_zaror · March 13, 2020, 8:08am

perfect! thank you very much @yash1994

Anuj_Daga · May 20, 2020, 3:32pm

#model class
class Framework(nn.Module):
    def __init__(self,input_shape, representation_size, output_shape, fc1, fc21,fc22,fc3,fc4):
        super(Framework, self).__init__()
        self.fc1 = fc1
        self.fc21 = fc21
        self.fc22 = fc22
        self.fc3 = fc3
        self.fc4 = fc4
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.elu=nn.ELU()
    
    def encoder(self, x):
        """Encode a batch of samples, and return posterior parameters for each point."""
        h1 = self.relu(self.fc1(x))
        return self.fc21(h1), self.elu(self.fc22(h1))+1
    
    def decoder(self, z):
        h3 = self.relu(self.fc3(z))
        return self.sigmoid(self.fc4(h3))
        
    def reparam(self, mu, logvar):
        """Reparameterisation trick to sample z values. 
        This is stochastic during training,  and returns the mode during evaluation."""
        
        if self.training:
            std = logvar.mul(0.5).exp_()
            eps = Variable(std.data.new(std.size()).normal_())
            return eps.mul(std).add_(mu)
        else:
            return mu
        
    def get_z(self, x):
        """Encode a batch of data points, x, into their z representations."""
        
        mu, logvar = self.encoder(x.view(-1, input_shape))
        return self.reparam(mu, logvar)
    
    def forward(self, x):
        """Takes a batch of samples, encodes them, and then decodes them again to compare."""
        mu, logvar = self.encoder(x.view(-1, input_shape))
        z = self.reparam(mu, logvar)
        return self.decoder(z), mu, logvar,z

#parameter sharing
fc1 = nn.Linear(input_shape, 512,bias=True)
torch.nn.init.xavier_uniform_(fc1.weight)
fc21 = nn.Linear(512, representation_size,bias=True)
torch.nn.init.xavier_uniform_(fc21.weight)
fc22 = nn.Linear(512, representation_size,bias=True)
torch.nn.init.xavier_uniform_(fc22.weight)
fc3 = nn.Linear(representation_size, 512,bias=True)
torch.nn.init.xavier_uniform_(fc3.weight)
fc4 = nn.Linear(512, output_shape,bias=True)
torch.nn.init.xavier_uniform_(fc4.weight)

#Complete model
model1 = Framework(input_shape, representation_size, output_shape, fc1, fc21,fc22,fc3,fc4)
model2 = Framework(input_shape, representation_size, output_shape, fc1, fc21,fc22,fc3,fc4)
model3 = Framework(input_shape, representation_size, output_shape, fc1, fc21,fc22,fc3,fc4)

model1.to(device)
model2.to(device)
model3.to(device)
#deine optimizers
f_params=model1.parameters()
s_params=model2.parameters()
t_params=model3.parameters()

dvne_params=itertools.chain(f_params,s_params,t_params)

optimizer = optim.RMSprop(f_params, lr=learning_rate)

Is it the correct way to share the parameters between three model lcass?