I have problem with using ELU, SELU, Sigmoid, Tanh with Residual Connection (Relu, PReLU works nice).
Here is code:
import torch.nn as nn import math import torch.nn.init as init # Residual Blok class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.relu1 = nn.ELU(1.0, False) self.conv2 = nn.Conv2d(planes, inplanes, kernel_size=3, stride=1, padding=1, bias=False) self.relu2 = nn.ELU(1.0, False) def forward(self, x): residual = x out = self.conv1(x) out = self.relu1(out) out = self.conv2(out) out = self.relu2(out) out += residual return out if __name__ == "__main__": import torch from torch.autograd import Variable m = BasicBlock(3,32).float() data = Variable(torch.Tensor(16,3,112,96).float()) feat = m(data) loss = feat.sum() print (loss) loss.backward()
Traceback (most recent call last): File "grad_problem.py", line 42, in <module> loss.backward() File "/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py", line 156, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables) File "/usr/local/lib/python2.7/dist-packages/torch/autograd/__init__.py", line 98, in backward variables, grad_variables, retain_graph) RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
Then I remove line
out += residual, everything works again. I’m not able to figure out what is wrong. Ex. ELU is set to not-inplace computation.
PS. I found that using torch.add instead of += make it works again. So maybe there is issue in adding value function.