Hey,
I get this error and although i know how to overcome it (explained below) i wonder why is it happening? so maybe someone can explain what is going on.
The issue is, that when using nn.Tanh activation, and then using “in place addition” throws the following error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [3, 4, 32, 32]], which is output 0 of TanhBackward, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
Replacing the Tanh with ReLU (while everything else stays the same) does not create this error, so I wonder what specific to Tanh that causes it.
Another way to overcome this is to replace the in place addition of
x += down1
to
x = x + down1
Full code example to reproduce the error is below, with 2 comments on the important lines.
Does someone know why this is happening? maybe its just a bug? and if it is ill open a ticket
Thank you in advance
import torch
import torch.nn as nn
import torch.optim as optim
class DownBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
super(DownBlock, self).__init__()
self.seq = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias),
nn.BatchNorm2d(out_channels),
nn.Tanh(), # <--- with ReLU its fine, but Tanh throws exception
)
def forward(self, x):
return self.seq(x)
class UpBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias=False):
super(UpBlock, self).__init__()
self.seq = nn.Sequential(
nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, output_padding, bias=bias),
nn.BatchNorm2d(out_channels),
nn.Tanh(),
)
def forward(self, x):
return self.seq(x)
class Test(nn.Module):
def __init__(self):
super(Test, self).__init__()
self.frame_down1 = DownBlock(1, 4, 3, 2, 1)
self.frame_down2 = DownBlock(4, 8, 3, 2, 1)
self.up1 = UpBlock(8, 4, 3, 2, 1, 1)
self.up2 = UpBlock(4, 1, 3, 2, 1, 1)
def forward(self, x):
down1 = self.frame_down1(x)
down2 = self.frame_down2(down1)
x = self.up1(down2)
x += down1 # <--- doing x = x + down1 is fine but x += down1 throws exception
x = self.up2(x)
return x
criterion = nn.MSELoss()
model = Test()
optimizer = optim.Adam(model.parameters())
model.train()
x = torch.rand(3,1,64,64)
y = model(x)
loss = criterion(x,y)
loss.backward()