How to replace layer in Sequential module

I want to replace activation functions and other layers in my model. I used the idea from this question: https://discuss.pytorch.org/t/how-to-replace-a-layer-with-own-custom-variant/43586

However, the code replaces all activation functions but not those inside the sequential module. Any idea what I do wrong? How can I replace the ReLU also in the sequential module?

import torch
import torch.nn as nn


class MLP(nn.Module):

    def __init__(self, num_in, num_hidden, num_out, seed=None):
        super().__init__()
        dropout_rate = 0.2
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.dropout2 = nn.Dropout(p=dropout_rate)

        self.linear1 = nn.Linear(num_in, num_hidden)
        self.linear2 = nn.Linear(num_hidden, num_hidden)
        self.linear3 = nn.Linear(num_hidden, num_out)

        self.sequential_module = nn.Sequential(
            nn.Linear(num_hidden, num_hidden),
            nn.ReLU(),
            nn.Dropout(p=0.2),
        )

        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()

    def forward(self, x):
        x = self.relu1(self.linear1(x))
        x = self.dropout1(x)

        x = self.relu2(self.linear2(x))
        x = self.dropout2(x)

        x = self.sequential_module(x)

        x = F.softmax(self.linear3(x), dim=1)

        return x


def replace_layer(module):
    for attr_str in dir(module):
        target_attr = getattr(module, attr_str)
        if type(target_attr) == torch.nn.ReLU:
            new_bn = torch.nn.Sigmoid()
            setattr(module, attr_str, new_bn)

    for name, immediate_child_module in module.named_children():
        replace_layer(immediate_child_module)


if __name__ == "__main__":

    model = MLP(num_in=2, num_hidden=16, num_out=3)
    print(model)

    replace_layer(model)
    print(model)

The code above produces the following output:

MLP(
  (dropout1): Dropout(p=0.2, inplace=False)
  (dropout2): Dropout(p=0.2, inplace=False)
  (linear1): Linear(in_features=2, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=16, bias=True)
  (linear3): Linear(in_features=16, out_features=3, bias=True)
  (sequential_module): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
  )
  (relu1): ReLU()
  (relu2): ReLU()
)
MLP(
  (dropout1): Dropout(p=0.2, inplace=False)
  (dropout2): Dropout(p=0.2, inplace=False)
  (linear1): Linear(in_features=2, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=16, bias=True)
  (linear3): Linear(in_features=16, out_features=3, bias=True)
  (sequential_module): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
  )
  (relu1): Sigmoid()
  (relu2): Sigmoid()
)

Hi Samuel!

Your code works for me (with an old 0.3.0 version of pytorch).

I copy-pasted your code and ran it:

>>> mm = MLP (2, 16, 3)
>>> mm
MLP(
  (dropout1): Dropout(p=0.2)
  (dropout2): Dropout(p=0.2)
  (linear1): Linear(in_features=2, out_features=16)
  (linear2): Linear(in_features=16, out_features=16)
  (linear3): Linear(in_features=16, out_features=3)
  (sequential_module): Sequential(
    (0): Linear(in_features=16, out_features=16)
    (1): ReLU()
    (2): Dropout(p=0.2)
  )
  (relu1): ReLU()
  (relu2): ReLU()
)
>>> replace_layer (mm)
>>> mm
MLP(
  (dropout1): Dropout(p=0.2)
  (dropout2): Dropout(p=0.2)
  (linear1): Linear(in_features=2, out_features=16)
  (linear2): Linear(in_features=16, out_features=16)
  (linear3): Linear(in_features=16, out_features=3)
  (sequential_module): Sequential(
    (0): Linear(in_features=16, out_features=16)
    (1): Sigmoid()
    (2): Dropout(p=0.2)
  )
  (relu1): Sigmoid()
  (relu2): Sigmoid()
)
>>> torch.__version__
'0.3.0b0+591e73e'

Best.

K. Frank

Hi KFrank,

that is interesting. My version is 1.9.0+cpu. Any idea why the results are different or what I can do the get the same result you got?

Hi Samuel!

Apparently there has been a change in how Sequentials (and
presumably other Modules) are stored sometime between my
prehistoric 0.3.0 version and the modern era.

Here is a script that illustrates the difference:

import torch
print (torch.__version__)

seq = torch.nn.Sequential (torch.nn.Linear (2, 3), torch.nn.ReLU())
print ('seq = ...')
print (seq)
print ('dir (seq) = ...')
print (dir (seq))

Here is its output for version 0.3.0:

0.3.0b0+591e73e
seq = ...
Sequential(
  (0): Linear(in_features=2, out_features=3)
  (1): ReLU()
)
dir (seq) = ...
['0', '1', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_all_buffers', '_apply', '_backend', '_backward_hooks', '_buffers', '_forward_hooks', '_forward_pre_hooks', '_modules', '_parameters', 'add_module', 'apply', 'children', 'cpu', 'cuda', 'double', 'dump_patches', 'eval', 'float', 'forward', 'half', 'load_state_dict', 'modules', 'named_children', 'named_modules', 'named_parameters', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_parameter', 'share_memory', 'state_dict', 'train', 'training', 'type', 'zero_grad']

Note that it contains dir() entries for its submodules, namely ‘0’ and ‘1’.

Here is the script output for a reasonably up-to-date 1.7.1 version:

1.7.1
seq = ...
Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): ReLU()
)
dir (seq) = ...
['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_buffers', '_call_impl', '_forward_hooks', '_forward_pre_hooks', '_get_item_by_idx', '_get_name', '_load_from_state_dict', '_load_state_dict_pre_hooks', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_parallel', '_save_to_state_dict', '_slow_forward', '_state_dict_hooks', '_version', 'add_module', 'apply', 'bfloat16', 'buffers', 'children', 'cpu', 'cuda', 'double', 'dump_patches', 'eval', 'extra_repr', 'float', 'forward', 'half', 'load_state_dict', 'modules', 'named_buffers', 'named_children', 'named_modules', 'named_parameters', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_parameter', 'requires_grad_', 'share_memory', 'state_dict', 'to', 'train', 'training', 'type', 'zero_grad']

Note that the explicit submodule dir() entries no longer appear.

To get the same result as me, run with a prehistoric pytorch version.

More usefully:

The idea of iterating over the attributes returned by dir() strikes me
as asking for trouble.

I would imagine that working with the official pytorch api, rather than
rummaging around in dir(), would be a more robust solution. I would
suggest iterating over .modules() or .children() or something.

Best.

K. Frank

1 Like

Would it make sense to iterate through the model using

for name, layer in module.named_modules():
    pass

in combination with getattr() and setattr() or what would be the PyTorch way of replacing layers in arbitrary networks?