Reset model weights

unnir · June 4, 2018, 3:05pm

I would like to know, if there is a way to reset weights for a PyTorch model.

Here is my code:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=5)

        self.pool = nn.MaxPool2d(5, stride=3)
        self.pool2 = nn.MaxPool2d(3, stride=1)
        self.activation = nn.ReLU()
        self.fc1 = nn.Linear(4096, 20)
        self.fc2 = nn.Linear(20, 1)

    def forward(self, x):
        x = self.activation( self.pool(self.conv1(x)) )
        x = self.activation( self.pool(self.conv2(x)) )
        x = self.activation( self.pool(self.conv3(x)) )
        x = self.activation( self.pool2(self.conv4(x)) )
        x = x.view(-1, 64*8*8)

        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

I just to want average couple runs of the model, in order to evaluate it.

Any idea how can I do that?

ptrblck · June 4, 2018, 3:13pm

You could save the state_dict and load it for resetting the model. Have a look at the Serialization Semantics to see how to do it.
Would this work for you or do you want to re-initialize it to random weights?

unnir · June 4, 2018, 3:15pm

thank you for the hint.

However, is there a way for the random re-initialization?

ptrblck · June 4, 2018, 3:18pm

Sure! You just have to define your init function:

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)

And call it on the model with:

model.apply(weight_init)

If you want to have the same random weights for each initialization, you would need to set the seed before calling this method with:

torch.manual_seed(your_seed)

Brando_Miranda · July 23, 2020, 7:25pm

This probably works too:

This is not a robust solution and wont work for anything except core torch.nn layers, but this works:

for layer in model.children():
   if hasattr(layer, 'reset_parameters'):
       layer.reset_parameters()

credit: Reinitializing the weights after each cross validation fold

Frida_de_Sigley · October 25, 2021, 3:54pm

could I perhaps double check if below is a robust solution? it is working - However I does seem useful to corroborate. I am looking for a solution that clears all weights between iterations of a hyper-parameter search. I am running individual models as sub-process’.

chk_dir = '/root/.cache/torch/hub/checkpoints/'

if os.path.isdir(chk_dir):
    for chkpnt in os.scandir(chk_dir):
        print(f'rm"ing {chkpnt.path}')
        os.system(f'rm {chkpnt.path}')

Brando_Miranda · November 9, 2021, 10:09pm

why doesn’t this work for you:

@unnir

Here is the code with an example that runs:

def lp_norm(mdl: nn.Module, p: int = 2) -> Tensor:
    lp_norms = [w.norm(p) for name, w in mdl.named_parameters()]
    return sum(lp_norms)

def reset_all_weights(model: nn.Module) -> None:
    """
    refs:
        - https://discuss.pytorch.org/t/how-to-re-set-alll-parameters-in-a-network/20819/6
        - https://stackoverflow.com/questions/63627997/reset-parameters-of-a-neural-network-in-pytorch
        - https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    """

    @torch.no_grad()
    def weight_reset(m: nn.Module):
        # - check if the current module has reset_parameters & if it's callabed called it on m
        reset_parameters = getattr(m, "reset_parameters", None)
        if callable(reset_parameters):
            m.reset_parameters()

    # Applies fn recursively to every submodule see: https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    model.apply(fn=weight_reset)


def reset_all_linear_layer_weights(model: nn.Module) -> nn.Module:
    """
    Resets all weights recursively for linear layers.

    ref:
        - https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    """

    @torch.no_grad()
    def init_weights(m):
        if type(m) == nn.Linear:
            m.weight.fill_(1.0)

    # Applies fn recursively to every submodule see: https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    model.apply(init_weights)


def reset_all_weights_with_specific_layer_type(model: nn.Module, modules_type2reset) -> nn.Module:
    """
    Resets all weights recursively for linear layers.

    ref:
        - https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    """

    @torch.no_grad()
    def init_weights(m):
        if type(m) == modules_type2reset:
            # if type(m) == torch.nn.BatchNorm2d:
            #     m.weight.fill_(1.0)
            m.reset_parameters()

    # Applies fn recursively to every submodule see: https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    model.apply(init_weights)


# -- tests

def reset_params_test():
    import torchvision.models as models
    from uutils.torch_uu import lp_norm

    resnet18 = models.resnet18(pretrained=True)
    resnet18_random = models.resnet18(pretrained=False)

    print(f'{lp_norm(resnet18)=}')
    print(f'{lp_norm(resnet18_random)=}')
    print(f'{lp_norm(resnet18)=}')
    reset_all_weights(resnet18)
    print(f'{lp_norm(resnet18)=}')


if __name__ == '__main__':
    reset_params_test()
    print('Done! \a\n')

output:

lp_norm(resnet18)=tensor(517.5472, grad_fn=<AddBackward0>)
lp_norm(resnet18_random)=tensor(668.3687, grad_fn=<AddBackward0>)
lp_norm(resnet18)=tensor(517.5472, grad_fn=<AddBackward0>)
lp_norm(resnet18)=tensor(476.0836, grad_fn=<AddBackward0>)
Done!

I am assuming this works because I calculated the norm twice for the pre-trained net and it was the same both times before calling reset.

Though I was unhappy it wasn’t closer to the norm of the random net I must admit but I think this is good enough.

related: python 3.x - Reset parameters of a neural network in pytorch - Stack Overflow

Sourabh · February 1, 2022, 10:53am

@ptrblck @Brando_Miranda I was trying to do something with regards to resetting the weights and then applying a new tensor as my weights for a particular layer. I just had one doubt regarding the above discussion - does the reset_parameters() also clear all the associated memory that the layer occupies?

ptrblck · February 1, 2022, 6:02pm

reset_parameters usually changes the parameters inplace as seen for e.g. nn.Linear so the memory usage shouldn’t change.

oustella · August 2, 2022, 8:50pm

How is reset_parameters different from re-instantiate the model instance?

ptrblck · August 2, 2022, 9:25pm

.reset_parameters() will reset the parameters inplace, such that the actual parameters are the same objects but their values will be manipulated.
This would allow you to use the same optimizer etc. in case you’ve already passed the parameters to it.
If you are creating a new module, you would of course also reset the parameters, but these parameters are new objects which you might need to pass to an optimizer again (depending on your actual use case).

nikita_gordia · December 25, 2022, 7:23pm

Actually there is more flexible variant:

def reset_model_weights(layer):
    if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
    else:
        if hasattr(layer, 'children'):
            for child in layer.children():
                reset_model_weights(child)

oguz-hanoglu · February 18, 2023, 11:59am

typo: weight_init → weights_init

Filippa · September 5, 2025, 11:58am

Is it possible to apply Xavier (or a similar initialization) to only a subset of weights in a layer? For example, I want to randomly re-initialize 20% of the weights while keeping the rest unchanged. Is there a standard way to do this in PyTorch, or do you have any suggestions?

J_Johnson · September 5, 2025, 12:45pm

@Filippa

You can apply a random boolean mask on any tensor in PyTorch, which will then only make the changes where True exists.

import torch
import torch.nn as nn
import torch.nn.init as init

def partial_xavier_init(model: nn.Module, percent: float):
    """
    Applies Xavier uniform initialization to a random subset (percent) of weights in each layer.
    
    Args:
    - model: The PyTorch model.
    - percent: Fraction of weights to reinitialize (0.0 to 1.0).
    """
    for name, param in model.named_parameters():
        if 'weight' in name:  # Apply only to weight parameters
            # Create a tensor with Xavier initialization
            new_param = torch.empty_like(param)
            init.xavier_uniform_(new_param)
            
            # Create random mask
            mask = torch.rand_like(param) < percent
            
            # Apply masked update
            param.data[mask] = new_param[mask]