I have been searching for the proper way for a list of named parameters to be tracked by the computation graph of a model that uses a custom autograd function. The best, hacky way I did this is via the following stupid and somewhat hacky way:
import torch
import torch.nn as nn
class CustomFunction(torch.autograd.Function):
@staticmethod
def forward(ctx, gamma, input):
ctx.save_for_backward(gamma, input)
output = torch.exp(gamma) * (torch.roll(input, +1) - input ** 2) ** 2 + (1 - input) ** 2
return torch.sum(output, 0)
@staticmethod
def backward(ctx, grad_output):
print("Call to backwards")
gamma, input = ctx.saved_tensors
grad_gamma = grad_input = None
if ctx.needs_input_grad[0]:
grad_gamma = grad_output * torch.exp(gamma) * ((torch.roll(input, +1) - input ** 2) ** 2)
if ctx.needs_input_grad[1]:
grad_input = grad_output * (-4 * input * torch.exp(gamma) * (torch.roll(input, +1) - input ** 2) - 2 * (input - 1))
return grad_gamma, grad_input
class TestModule(torch.nn.Module):
def __init__(self, num_weights):
super(TestModule, self).__init__()
# Create a linear layer for each input feature
self.gamma_layers = nn.ModuleList([nn.Linear(in_features=1, out_features=1, bias=False) for _ in range(3)])
# Initialize each linear layer with a random weight
for layer in self.gamma_layers:
layer.weight.data = torch.randn(1)
def forward(self, input: torch.Tensor):
print("Gamma before pass", input, input.shape)
# Apply each linear layer to its corresponding input feature
gamma = torch.cat([self.gamma_layers[i](input[i].unsqueeze(0)).unsqueeze(0) for i in range(input.shape[0])], dim=0)
print("Gamma inside Net", gamma)
print("Gamma inside Net", gamma.shape)
# Assuming CustomFunction expects 2D inputs as well
output = CustomFunction.apply(gamma, input)
return output
# Initialize model with multiple weights (e.g., 3)
num_weights = 3
model = TestModule(num_weights)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
def random_custom_loss(output, gamma):
target = torch.tensor(0.5) # Random target for illustration
loss = torch.mean((output - target) ** 2) + torch.mean(torch.abs(gamma))
return loss
# Number of epochs
num_epochs = 10
model.train()
for epoch in range(num_epochs):
optimizer.zero_grad() # Clear previous gradients
# Use the current weights as input for the forward pass
input_tensor = torch.cat([layer.weight.clone() for layer in model.gamma_layers])
output = model(input_tensor)
print(output)
# Calculate custom loss
loss = random_custom_loss(output, input_tensor)
print(loss)
# Backward pass
loss.backward()
# Update weights
optimizer.step()
# Print weights and gradients
print(f"Epoch {epoch+1}")
for i, layer in enumerate(model.gamma_layers):
print(f"Gamma weights {i+1}: {layer.weight.data}")
print(f"Gamma grads {i+1}: {layer.weight.grad}")
print("-" * 30)
Okay, so this is an isolated version of my use case. I wanted to initialize a bunch of random values to act as parameters for my model. These values act as weights
that need to be modified during training run time. This is the part that does that:
# Create a linear layer for each input feature
self.gamma_layers = nn.ModuleList([nn.Linear(in_features=1, out_features=1, bias=False) for _ in range(3)])
# Initialize each linear layer with a random weight
for layer in self.gamma_layers:
layer.weight.data = torch.randn(1)
The above implementation works for accessing the values I want to plug in into the autograd function. It just uses a single layer NN to get a 1-to-1 mapping of the values I want to plug in, except these values are actually tracked by pytorch. If I initialize these values via nn.Parameter
& nn.ParameterList
, the values are not considered part of the computation graph.
Don’t mind what the functions are actually doing/optimizing on. It’s just random filler logic.
I’m fairly sure there IS a proper way to do this, but I was kind of pressed for time and had to do this abomination of an implementation lol