I have a task that requires me to tweak individual weights but I am struggling to come up with ways to access the weights neatly. The process that I came up with below, for instance, will not work.
Basically, I would like to:
- Initialize model
- Save a copy of all the initial weights.
- Run a mini-batch, send loss backward, and run
optimizer.step()
. - Use step 2 to overwrite randomly selected
n
backprop updated weights, which requires some sort of indexing. - Perform step 3 and 4 repeatedly until epoch finishes.
Edit: I have updated the workflow a bit, but now I am stuck dealing with the same error.
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# initialize model
class mlp1(torch.nn.Module):
def __init__(self, num_features, num_hidden, num_classes):
super(mlp1, self).__init__()
self.num_classes = num_classes
self.input_layer = torch.nn.Linear(num_features, num_hidden)
self.out_layer = torch.nn.Linear(num_hidden, num_classes)
def forward(self, x):
x = self.input_layer(x)
x = torch.sigmoid(x)
logits = self.out_layer(x)
probas = torch.softmax(logits, dim=1)
return logits, probas
# instantiate model
model = mlp1(num_features=28*28, num_hidden=100, num_classes=10).to(device)
# get params
param_optimizer = list(model.named_parameters())
# look for just thse params
params = ['weight', 'bias']
# group them
optimizer_grouped_parameters = [
{'params': [param for name, param in param_optimizer if any(weight in name for weight in params)] },
]
# store indices to freeze
frozen_indices = []
# store frozen weight values
frozen_weights = []
# specify percent to freeze
freeze_percent = 0.7
for i in range(len(optimizer_grouped_parameters[0]['params'])):
# gen indices
f_indices = torch.tensor(np.sort(
np.random.choice(np.arange(len(optimizer_grouped_parameters[0]['params'][i].view(-1))),
size=int(freeze_percent*len(optimizer_grouped_parameters[0]['params'][i].view(-1))),
replace=False))).long()
# append indices
frozen_indices.append(f_indices)
# retrieve weights to freeze; use a mask for fast access
f_weights = optimizer_grouped_parameters[0]['params'][i].view(-1)[f_indices]
# save them to a container
frozen_weights.append(f_weights)
# replace weights after one mini-batch
for i in range(len(optimizer_grouped_parameters[0]['params'])):
optimizer_grouped_parameters[0]['params'][i].view(-1)[frozen_indices[i]] = frozen_weights[i]
# optim
optimizer = torch.optim.SGD(optimizer_grouped_parameters, lr=0.1)
# ValueError: can't optimize a non-leaf Tensor