RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.LongTensor [64]] is at version 2; expected version 1 instead

Hi, I meet a problem and I have searched this kind of posts and revise based on the suggestion such as +=, adding .clone() or inplace=False. However, it doesn’t work. I don’t know the reason for my problem.

My code is as follows.
The initial part is simple

self.criterion = nn.CrossEntropyLoss(reduction='none')
self.optimizer = optim.SGD(target_model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

First I want to get the output, label, loss, and last layer’s gradient from a model called target model.

def get_data(self, inputs, targets):
	outputs= self.target_model(inputs)
	losses = self.criterion(outputs, targets)

	gradient = []
	for loss in losses:
		loss.backward(retain_graph=True)

		gradient_list = reversed(list(self.target_model.named_parameters()))

		for name, parameter in gradient_list:
			if 'weight' in name:
				gradients = parameter.grad.clone()
				gradients = gradients.unsqueeze_(0)
				gradient.append(gradients.unsqueeze_(0))
				break

	gradient = torch.cat(gradient, dim=0)
	losses = losses.unsqueeze_(1)
	targets = targets.unsqueeze_(1).float()

	return outputs, gradient, losses, targets

And use these to train another classifier.


def train(self):
	self.attack_model.train()
	train_loss = 0
	correct = 0
	total = 0

	for inputs, targets, members in self.train_loader:
		inputs, targets, members = inputs.to(self.device), targets.to(self.device), members.to(self.device)
			
		outputs, gradient, losses, targets = self.get_data(inputs, targets)
			
		results = self.model(outputs, losses, gradient, targets)
		with torch.autograd.set_detect_anomaly(True):
			loss_2 = self.criterion(results, members).mean()
			self.optimizer.zero_grad()
			loss_2.backward()
			self.optimizer.step()

My model has five parts, the first four are used to get a result of each network and after that I concatenate them together and as an input to the fifth network to get result.

class model(nn.Module):
	def __init__(self):
		super(model, self).__init__()
		self.Output_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(100, 128),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(128, 64),
		)

		self.Label_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(1, 128),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(128, 64),
		)

		self.Loss_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(1, 128),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(128, 64),
		)

		self.Gradient_Component = nn.Sequential(
			nn.Conv2d(1, 3, 5),
			nn.ReLU(),
			nn.Flatten(),
			nn.Dropout(p=0.2),
			nn.Linear(3 * 96 * 4092, 128),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(128, 64),
		)

		self.Encoder_Component = nn.Sequential(
			nn.Dropout(p=0.2),
			nn.Linear(256, 256),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(256, 128),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(128, 64),
			nn.ReLU(),
			nn.Dropout(p=0.2),
			nn.Linear(64, 2),
		)


	def forward(self, a, b, c, d):
		Output_Component_result = self.Output_Component(a)
		Loss_Component_result = self.Loss_Component(b)
		Gradient_Component_result = self.Gradient_Component(c)
		Label_Component_result = self.Label_Component(d)
		
		final_inputs = torch.cat((Output_Component_result,Loss_Component_result,Gradient_Component_result, Label_Component_result), 1)	
		final_result = self.Encoder_Component(final_inputs)

		return final_result