I am trying to create a model that allows the user to specify the number of hidden layers to be integrated to the network.
Specifically, this is my model :
class MLP(nn.Module):
def __init__(self, h_sizes, out_size):
super(MLP, self).__init__()
# Hidden layers
self.hidden = []
for k in range(len(h_sizes)-1):
self.hidden.append(nn.Linear(h_sizes[k], h_sizes[k+1]))
# Output layer
self.out = nn.Linear(h_sizes[-1], out_size)
def forward(self, x):
# Feedforward
for layer in self.hidden:
x = F.relu(layer(x))
output= F.softmax(self.out(x), dim=1)
return output
I would like to be able to give it the parameter “h_sizes” which would be a list of integers defining the size of each hidden layer (and the length of the list defining the number of hidden layers to build).
However, it doesn’t quite work as it doesn’t seem to include the parameters of those layers in “model.parameters”, making it unconvenient to send the model on GPU.
Those layers are not set directly as attributes of self, so you need to register them explicitly using self.add_module(namestr, layer). For example…
class MLP(nn.Module):
def __init__(self, h_sizes, out_size):
...
for k in range(len(h_sizes)-1):
self.hidden.append(nn.Linear(h_sizes[k], h_sizes[k+1]))
self.add_module("hidden_layer"+str(k), self.hidden[-1])
...
Using nn.ModuleList is a much neater solution.
class MLP(nn.Module):
def __init__(self, h_sizes, out_size):
...
self.hidden = nn.ModuleList()
for k in range(len(h_sizes)-1):
self.hidden.append(nn.Linear(h_sizes[k], h_sizes[k+1]))
...
Hi, no you don’t need to. For those who are interested in, this is a more clean implementation (because no len(), range(), index access [] … but it’s just my opinion) and more complete with optimizer, device, forward function and usage example :
import torch
from torch import nn, optim
from torch.nn.modules import Module
from implem.settings import settings
class MLP(nn.Module):
def __init__(self, input_size, layers_data: list, learning_rate=0.01, optimizer=optim.Adam):
super().__init__()
self.layers = nn.ModuleList()
self.input_size = input_size # Can be useful later ...
for size, activation in layers_data:
self.layers.append(nn.Linear(input_size, size))
input_size = size # For the next layer
if activation is not None:
assert isinstance(activation, Module), \
"Each tuples should contain a size (int) and a torch.nn.modules.Module."
self.layers.append(activation)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.to(self.device)
self.learning_rate = learning_rate
self.optimizer = optimizer(params=self.parameters(), lr=learning_rate)
def forward(self, input_data):
for layer in self.layers:
input_data = layer(input_data)
return input_data
if __name__ == "__main__":
"""
Tests
"""
data_size = 5
layer1, layer2 = 10, 10
output_size = 2
data = torch.randn(data_size)
mlp = MLP(data_size, [(layer1, nn.ReLU()), (layer2, nn.ReLU()), (output_size, nn.Sigmoid())])
output = mlp(data)
print("done")