Hello!
I’m trying to implement the following optimization problem minimizing a loss functions built as follow:
During the implementation i’ve developed the surrogate of agent i (f tilde) and the other term with respect to all the j other agents of a multi agent system. Trying to solve this using loss.backward() arises issued related to "variables needed for gradient computation has been modified ". This is my code:
class Agents:
def __init__(self, train_loaders, val_loaders, test_loader, num_agents, pie = [], tau = 10, epochs=5):
self.train_loaders = train_loaders
self.val_loaders = val_loaders
self.test_loader = test_loader
self.num_agents = num_agents
self.epochs = epochs
self.gamma_values = [ 0.5 ]
self.models = [Net() for _ in range(num_agents)]
self.optimizers = [optim.SGD(model.parameters(), lr= 1/tau) for model in self.models]
self.criterion = nn.CrossEntropyLoss()
self.validation_accuracies = {i: 0 for i in range(num_agents)} # Dictionary to store validation accuracies
self.losses = {i: [] for i in range(num_agents)} # Dictionary to store losses
def gamma_update(self, epsilon=2): #updating step size rule
gamma_next = self.gamma_values[-1]*(1- (epsilon*self.gamma_values[-1])) #gamma[0] = 1/eps
self.gamma_values.append(gamma_next)
def full_linearization(self, y_k, agent_idx):
model_i = self.models[agent_idx]
x_i = [param.clone() for param in model_i.parameters()] # Store the current parameters (x_i)
model_i.load_state_dict(y_k)
model_i.train()
# Initialize pi_i and difference with zeros (same shape as model_i state_dict)
pi_i = {k: torch.zeros_like(v) for k, v in model_i.state_dict().items()}
difference = {k: torch.zeros_like(v) for k, v in model_i.state_dict().items()}
total_surrogate_loss = 0.0
for inputs, labels in self.train_loaders[agent_idx]:
optimizer = self.optimizers[agent_idx]
optimizer.zero_grad()
# Forward pass for agent i
outputs_i = model_i(inputs)
CE_surrogate = self.criterion(outputs_i, labels)
total_surrogate_loss += CE_surrogate
# Compute pi_i(x_i[n]) gradients
for j in range(self.num_agents):
if j != agent_idx:
model_j = self.models[j]
model_j.load_state_dict(y_k) # Load y_k for agent j
for inputs_j, labels_j in self.train_loaders[j]:
outputs_j = model_j(inputs_j)
loss_j = self.criterion(outputs_j, labels_j)
grads = torch.autograd.grad(loss_j, model_j.parameters(), retain_graph=True)
# Accumulate gradients in pi_i (note the use of `clone` to avoid in-place operations)
for (name, param), grad in zip(model_j.named_parameters(), grads):
pi_i[name] = pi_i[name] + grad.clone()
# Flatten pi_i
pi_flatten = torch.cat([v.view(-1) for v in pi_i.values()])
# Compute difference vector x_i - y_k
for (name, y_k_param), x_i_param in zip(y_k.items(), x_i):
difference[name] = difference[name] + (x_i_param - y_k_param).clone()
diff_flat = torch.cat([v.view(-1) for v in difference.values()])
# Add pi_flatten^T * diff_flat to the surrogate loss
total_surrogate_loss += torch.dot(pi_flatten, diff_flat)
# Backward pass and optimization
total_surrogate_loss.backward(retain_graph=True) # Using retain_graph=True since we might need it for further computations
optimizer.step()
post_opt = model_i.state_dict()
avg_loss = total_surrogate_loss.item() / len(self.train_loaders[agent_idx])
self.losses[agent_idx].append(avg_loss)
print(f"Surrogate agent {agent_idx}: {avg_loss}")
return pre_opt, post_opt
Can anyone see the issue related to this code? There are apparently no inplace operation s that modify the tensors required for the optimization procedure. This is the error:
RuntimeError Traceback (most recent call last)
in <cell line: 4>()
2 s = Agents(train_loaders, val_loaders, test_loader, num_agents)
3 y_k = Net().state_dict()
----> 4 s.full_linearization(y_k, 0)
3 frames
/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py in _engine_run_backward(t_outputs, *args, **kwargs)
766 unregister_hooks = _register_logging_hooks_on_whole_graph(t_outputs)
767 try:
→ 768 return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
769 t_outputs, *args, **kwargs
770 ) # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 10]], which is output 0 of AsStridedBackward0, is at version 4; expected version 3 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!