I want to create a model where I have a network-wide learnable parameter which I need to pass to each layer. I have thought of 2 ways of doing this:
(1)
class Func(nn.Module):
def __init__(self, data_dim, hidden_dim1, hidden_dim2, target_dim):
super().__init__()
self.lamb_mu = nn.Parameter(torch.Tensor(1).uniform_(1, 1))
self.lamb_rho = nn.Parameter(torch.Tensor(1).uniform_(-6., -6.))
self.l1 = Lasso_layer(data_dim, hidden_dim1, self.rho_prior)
self.l2 = Lasso_layer(hidden_dim1, hidden_dim2, self.rho_prior)
self.l4 = Lasso_layer(hidden_dim2, target_dim, self.rho_prior)
self.lamb_sigma = None
def forward(self, X):
self.lamb_sigma = torch.log1p(torch.exp(self.lamb_rho))
output = F.relu(self.l1(X.reshape(-1, 28*28), self.lamb_mu, self.lamb_sigma))
output = F.relu(self.l2(output, self.lamb_mu, self.lamb_sigma))
output = self.l4(output, self.lamb_mu, self.lamb_sigma)
return output
(2)
class Func(nn.Module):
def __init__(self, data_dim, hidden_dim1, hidden_dim2, target_dim):
super().__init__()
self.lamb_mu = nn.Parameter(torch.Tensor(1).uniform_(1, 1))
self.lamb_rho = nn.Parameter(torch.Tensor(1).uniform_(-6., -6.))
self.l1 = Lasso_layer(data_dim, hidden_dim1, self.rho_prior, self.lamb_mu, self.lamb_rho)
self.l2 = Lasso_layer(hidden_dim1, hidden_dim2, self.rho_prior, self.lamb_mu, self.lamb_rho)
self.l4 = Lasso_layer(hidden_dim2, target_dim, self.rho_prior, self.lamb_mu, self.lamb_rho)
self.lamb_sigma = None
def forward(self, X):
output = F.relu(self.l1(X.reshape(-1, 28*28)))
output = F.relu(self.l2(output))
output = self.l4(output)
return output
Which of these approaches is correct and will update lamb_mu and lamb_rho parameters appropriately for the entire network?