I am also encountering this issue when building up a GAN. I just updated my PyTorch installation today (was having some problems with my code causing a full server crash, so updated my CUDA installation, drivers, and PyTorch environment and now get this error instead), and have tried to work my code down to the most simplified example that reproduces the error.
faulttest.py:
import torch
from torch import nn
config = {
'batch_size': 8,
'inp_x_size': 4,
'latent_size': 2,
}
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.linear_stack = nn.Sequential(
nn.Linear(config['inp_x_size'], config['latent_size']),
nn.Linear(config['latent_size'], config['inp_x_size']),
)
def forward(self, x):
return self.linear_stack(x)
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.linear_stack = nn.Sequential(
nn.Linear(config['inp_x_size'], 1)
)
def forward(self, x):
return self.linear_stack(x)
def train(
generator: Generator, discriminator: Discriminator,
g_loss_fn: nn.Module, d_loss_fn: nn.Module,
g_optimizer: nn.Module, d_optimizer: nn.Module,
):
for batch in range(1):
#Load a batch
x = torch.rand((config['batch_size'], config['inp_x_size']), dtype=torch.float32)
g_forward = generator(x)
g_loss = g_loss_fn(g_forward, x)
#Backprop and optimization for generator
g_optimizer.zero_grad()
g_loss.backward(retain_graph=True)
g_optimizer.step()
#Calculate discriminator loss
y = torch.rand((config['batch_size'], 1), dtype=torch.float32)
d_score = discriminator(g_forward) #FAIL
#d_score = discriminator(g_forward.detach()) #WORKS
d_loss = d_loss_fn(d_score, y)
#Backprop and optimization for discriminator
d_optimizer.zero_grad()
d_loss.backward()
d_optimizer.step()
def main():
torch.autograd.set_detect_anomaly(True)
#Construct networks
generator = Generator()
discriminator = Discriminator()
#Define loss functions
g_loss_fn = nn.MSELoss()
d_loss_fn = nn.MSELoss()
#Define optimizers
g_optimizer = torch.optim.Adam(generator.parameters(), lr=1E-4)
d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=1E-4)
#Perform training cycles
for epoch in range(1):
train(generator, discriminator, g_loss_fn, d_loss_fn, g_optimizer, d_optimizer)
if __name__ == "__main__":
main()
Console log:
(pytorch) jon@io:/mnt/ssd-storage/GAN_Playground$ python3 faulttest.py
Torch: 2.1.2
/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/autograd/__init__.py:251: UserWarning: Error detected in AddmmBackward0. Traceback of forward call that caused the error:
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 76, in <module>
main()
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 73, in main
train(generator, discriminator, g_loss_fn, d_loss_fn, g_optimizer, d_optimizer)
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 37, in train
g_forward = generator(x)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 18, in forward
return self.linear_stack(x)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/container.py", line 215, in forward
input = module(input)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
(Triggered internally at /opt/conda/conda-bld/pytorch_1702400430266/work/torch/csrc/autograd/python_anomaly_mode.cpp:114.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 76, in <module>
main()
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 73, in main
train(generator, discriminator, g_loss_fn, d_loss_fn, g_optimizer, d_optimizer)
File "/mnt/ssd-storage/GAN_Playground/faulttest.py", line 53, in train
d_loss.backward()
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/_tensor.py", line 492, in backward
torch.autograd.backward(
File "/home/jon/anaconda3/envs/pytorch/lib/python3.11/site-packages/torch/autograd/__init__.py", line 251, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [2, 4]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Notably, detaching the output of the generator before passing it into the discriminator “fixes” the error in my case.