I use the following class as part of my VAE model.
class AxialAttentionUnet(nn.Module):
def __init__(self, block, layers, num_classes=2, zero_init_residual=True,
groups=8, width_per_group=64, replace_stride_with_dilation=None,
s=0.125, img_size = 128, imgchan = 3 , features=[ 64, 128, 256, 512]):
super(AXIAL_ATTENTION_UNET, self).__init__()
self.features = features
self.inplanes = int(features[0] * s)
self.dilation = 1
if replace_stride_with_dilation is None:
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.relu = nn.ReLU(inplace=True)
down_layers = nn.ModuleList()
down_layers.append(nn.Conv2d( imgchan, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False))
down_layers.append(nn.BatchNorm2d(self.inplanes))
down_layers.append(self.relu)
down_layers.append(nn.Conv2d(self.inplanes, features[1], kernel_size=3, stride=1, padding=1, bias=False))
down_layers.append(nn.BatchNorm2d(features[1]))
down_layers.append(self.relu)
down_layers.append(nn.Conv2d(features[1], self.inplanes, kernel_size=3, stride=1, padding=1, bias=False))
down_layers.append(nn.BatchNorm2d(self.inplanes))
down_layers.append(self.relu)
self.down = nn.Sequential(*down_layers)
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.encoder_layer = nn.ModuleList()
ks = img_size//2
#three layers
for i in range(1,len(features)):
if i==0:
self.encoder_layer.append( self._make_layer(block, int(features[i] * s), layers[i-1], kernel_size= ks))
else :
self.encoder_layer.append( self._make_layer(block, int(features[i] * s), layers[i-1], stride=2, kernel_size=ks,
dilate=replace_stride_with_dilation[i-2]))
ks=ks//2
# Decoder
self.decoder_layer = nn.ModuleList()
self.decoder_layer.append(nn.Conv2d(int(features[-1] *2*s), int(features[-1]*2*s), kernel_size=3, stride=2, padding=1))
self.decoder_layer.append(nn.Conv2d(int(features[-1] *2*s), int(features[-1]*s), kernel_size=3, stride=1, padding=1))
for idx in reversed( range(1, len(features))):
if idx > 1:
self.decoder_layer.append( nn.Conv2d(int(features[idx]*s) , int(features[idx-1]*s), kernel_size=3, stride=1, padding=1))
else:
self.decoder_layer.append( nn.Conv2d(int(features[idx]*s) , num_classes, kernel_size=1, stride=1, padding=0))
self.soft = nn.Softmax(dim=1)
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
self.to(device=self.device)
def _make_layer(self, block, planes, blocks, kernel_size=56, stride=1, dilate=False):
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride =stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, groups=self.groups,
base_width=self.base_width, dilation=previous_dilation,
kernel_size=kernel_size))
self.inplanes = planes * block.expansion
if stride != 1:
kernel_size = kernel_size // 2
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
kernel_size=kernel_size))
return nn.Sequential(*layers)
def forward(self, x):
x = self.down(x)
# x = self.maxpool(x)
# pdb.set_trace()
x1 = self.encoder_layer[0](x)
# print(x1.shape)
x2 = self.encoder_layer[1](x1)
# print(x2.shape)
x3 = self.encoder_layer[2](x2)
x = self.relu(F.interpolate(self.decoder_layer[0](x3) , scale_factor=(2,2), mode ='bilinear'))
x = torch.add(x.clone(), x3)
x = self.relu(F.interpolate(self.decoder_layer[1](x) , scale_factor=(2,2), mode ='bilinear'))
x = torch.add(x.clone(), x2)
x = self.relu(F.interpolate(self.decoder_layer[2](x) , scale_factor=(2,2), mode ='bilinear'))
x = torch.add(x.clone(), x1)
x = self.relu(F.interpolate(self.decoder_layer[3](x) , scale_factor=(2,2), mode ='bilinear'))
x = self.decoder_layer[4](self.relu(x))
# print(x.shape)
# end of full image training
return x3, x
when I run the code, I would get this error message:
File "/home/utils.py", line 799, in forward
x = self.relu(F.interpolate(self.decoder_layer[3](x) , scale_factor=(2,2), mode ='bilinear'))
File "/h/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/h/anaconda3/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 102, in forward
return F.relu(input, inplace=self.inplace)
File "/h/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1296, in relu
result = torch.relu_(input)
(function _print_stack)
Traceback (most recent call last):
File "Hierarchical_VAE.py", line 1586, in <module>
average_epoch_loss, elbo, wasserstein_loss, latent_dimension_kld = train(epoch)
File "Hierarchical_VAE.py", line 1421, in train
loss_dict["WAE-GP"].backward()
File "/h/anaconda3/lib/python3.8/site-packages/torch/_tensor.py", line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/h/anaconda3/lib/python3.8/site-packages/torch/autograd/__init__.py", line 147, in backward
Variable._execution_engine.run_backward(
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor
[100, 16, 48, 48]], which is output 0 of ReluBackward1, is at version 2; expected version 1 instead. Hint: the backtrace further above
shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I will appreciate if someone can point out how I can fix this error message. Thanks.