Hi all ,
I am new to Pytorch and need some help.
I would like to add, in the definition of a very simple fully connected NN class (FCN) using only nn.Linear layers:
- an option to select an activation function (e.g. Tanh, RELU,…) and
- a initialization type (Xavier, Kaiming, zeros,…).
Here my first code snippet, which unfortunately not works:
class FCN(nn.Module): # inherent from nn.Module
"Defines a standard fully-connected network in PyTorch"
# Initialization of the neural network layers in __init__:
def __init__(self, input_dim, hidden_dim, output_dim, activation = "Tanh", initialization = "Xavier_normal"):
super().__init__()
# HERE THE DEFINITION OF ACTIVATION FUNCTIONS:
activation_functions = nn.ModuleDict([
["Tanh", nn.Tanh()],
["ReLU", nn.ReLU()],
["LeakyReLU", nn.LeakyReLU()],
["Sigmoid", nn.Sigmoid()],
["Softmax", nn.Softmax()],
])
# HERE THE DEFINITION OF INITIALIZATION OPTION:
initialization_type = nn.ModuleDict([
["Xavier_normal", nn.init.xavier_normal_],
])
self.initialization = initialization
initialization.get(initialization, None)
# INPUT Linear function:
self.fc_in = nn.Linear(input_dim, hidden_dim)
# INPUT Non-linearity (activation function):
self.tangh = nn.Tanh()
# OUTPUT Linear function:
self.fc_out = nn.Linear(hidden_dim, output_dim)
def forward(self, input):
# INPUT Linear function:
hidden = self.fc_in(input)
# HIDDEN Non-linearity
hidden = self.tangh(hidden)
# OUTPUT Linear function
output = self.fc_out(hidden)
return output
def weights_and_bias_initialization(self):
for module_with_layers in self.modules():
if isinstance(module_with_layers, nn.Linear):
self.initialization(module_with_layers.weight)
#nn.init.xavier_uniform_(module_with_layers.weight)
#nn.init.xavier_uniform_(module_with_layers.bias)
#nn.init.constant_(module_with_layers.bias, 0)
I am using a nn.ModuleDic, following these instructions, by the definition of the posible activation functions (Tanh,…) and this part works!!.
The error appears by the definition of the initialization using:
model_test = FCN(2, 8, 1, activation="Tanh", initialization="Xavier_normal")
when I use again a nn.ModuleDict by the definition of the initialization:
initialization_type = nn.ModuleDict([
["Xavier_normal", nn.init.xavier_normal_],
])
ERROR:
TypeError: torch.nn.init.xavier_normal_ is not a Module subclass
If I use an instance of the nn.init.xavier_normal_, I obtain a new error:
TypeError: xavier_normal_() missing 1 required positional argument: 'tensor'
I am not quite sure if a nn.ModuleDict is the best option for this case.
A second question related with:
Is the place of the initialization, within the class definition the best place?
I can remember that @ptrblck explained somewhere (unfortunately I can not find again his explanation and a code snippet), that it is better to locate the definition of the function outside of the class after the instance.
If I use the following code snippet, using the success activation option within the class and using the activation outside:
def weights_and_bias_initialization(model_instance):
for module_with_layers in model_instance.modules():
if isinstance(module_with_layers, nn.Linear):
nn.init.xavier_uniform_(module_with_layers.weight)
nn.init.xavier_uniform_(module_with_layers.bias)
class FCN_2(nn.Module): # inherent from nn.Module
"Defines a standard fully-connected network in PyTorch"
# Initialization of the neural network layers in __init__:
def __init__(self, input_dim, hidden_dim, output_dim, activation = "Tanh"):
super().__init__()
# HERE THE DEFINITION OF ACTIVATION FUNCTIONS:
activation_functions = nn.ModuleDict([
["Tanh", nn.Tanh()],
["ReLU", nn.ReLU()],
["LeakyReLU", nn.LeakyReLU()],
["Sigmoid", nn.Sigmoid()],
["Softmax", nn.Softmax()],
])
# INPUT Linear function:
self.fc_in = nn.Linear(input_dim, hidden_dim)
# INPUT Non-linearity (activation function):
self.tangh = nn.Tanh()
# OUTPUT Linear function:
self.fc_out = nn.Linear(hidden_dim, output_dim)
def forward(self, input):
# INPUT Linear function:
hidden = self.fc_in(input)
# HIDDEN Non-linearity
hidden = self.tangh(hidden)
# OUTPUT Linear function
output = self.fc_out(hidden)
return output
model = FCN_2(2, 8, 1, activation="Tanh")
with torch.no_grad():
weights_and_bias_initialization(model)
I get an ERROR related with nn.init.xavier_uniform_(module_with_layers.bias):
ValueError: Fan in and fan out can not be computed for tensor with fewer than 2 dimensions.
Is the mistake related with the definition of the output?
Thanks a lot