How to manipulate individual weights and can't optimize a non-leaf tensor

localh · February 24, 2021, 5:37pm

I have a task that requires me to tweak individual weights but I am struggling to come up with ways to access the weights neatly. The process that I came up with below, for instance, will not work.

Basically, I would like to:

Initialize model
Save a copy of all the initial weights.
Run a mini-batch, send loss backward, and run optimizer.step().
Use step 2 to overwrite randomly selected n backprop updated weights, which requires some sort of indexing.
Perform step 3 and 4 repeatedly until epoch finishes.

Edit: I have updated the workflow a bit, but now I am stuck dealing with the same error.

import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# initialize model
class mlp1(torch.nn.Module):
    def __init__(self, num_features, num_hidden, num_classes):
        super(mlp1, self).__init__()
        self.num_classes = num_classes
        self.input_layer = torch.nn.Linear(num_features, num_hidden)
        self.out_layer = torch.nn.Linear(num_hidden, num_classes)

    def forward(self, x):
        x = self.input_layer(x)
        x = torch.sigmoid(x)
        logits = self.out_layer(x)
        probas = torch.softmax(logits, dim=1)
        return logits, probas

# instantiate model
model = mlp1(num_features=28*28, num_hidden=100, num_classes=10).to(device)

# get params
param_optimizer = list(model.named_parameters())
# look for just thse params
params = ['weight', 'bias']
# group them
optimizer_grouped_parameters = [
        {'params': [param for name, param in param_optimizer if any(weight in name for weight in params)] },
        ]

# store indices to freeze
frozen_indices = []
# store frozen weight values
frozen_weights = []
# specify percent to freeze
freeze_percent = 0.7
for i in range(len(optimizer_grouped_parameters[0]['params'])):
    # gen indices
    f_indices = torch.tensor(np.sort(
                                     np.random.choice(np.arange(len(optimizer_grouped_parameters[0]['params'][i].view(-1))),
                                                       size=int(freeze_percent*len(optimizer_grouped_parameters[0]['params'][i].view(-1))),
                                                        replace=False))).long()
    # append indices
    frozen_indices.append(f_indices)

    # retrieve weights to freeze; use a mask for fast access
    f_weights = optimizer_grouped_parameters[0]['params'][i].view(-1)[f_indices]
    # save them to a container
    frozen_weights.append(f_weights)

# replace weights after one mini-batch
for i in range(len(optimizer_grouped_parameters[0]['params'])):
    optimizer_grouped_parameters[0]['params'][i].view(-1)[frozen_indices[i]] = frozen_weights[i]

# optim
optimizer = torch.optim.SGD(optimizer_grouped_parameters, lr=0.1)
# ValueError: can't optimize a non-leaf Tensor