Hey There, pytorch master passing by!, can you help me?
Below is the neural network model I made, a terrible error occurred while backpropagating.
class ConvFeatureExtractor(nn.Module):
def __init__(self, input_channel = 3, output_channel = 32):
super(ConvFeatureExtractor, self).__init__()
self.output_channel = [4, 8, 16, output_channel]
self.ConvNet = nn.Sequential(nn.Conv2d(input_channel, self.output_channel[0], kernel_size = 11, stride = 3, padding = 1),
nn.ReLU(inplace = True),
nn.MaxPool2d(3, 3), # 4x56x56
nn.Conv2d(self.output_channel[0], self.output_channel[1], 3, 1, 1),
nn.ReLU(True),
nn.MaxPool2d(2, 2), # 8x28x28
nn.Conv2d(self.output_channel[1], self.output_channel[2], 3, 1, 1), # 16x28x28
nn.ReLU(True),
nn.Conv2d(self.output_channel[2], self.output_channel[2], 3, 1, 1),
nn.MaxPool2d(2, 2), # 16x14x14
nn.Conv2d(self.output_channel[2], self.output_channel[3], 3, 1, 1, bias = False),
nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True), # 32x14x14
nn.Conv2d(self.output_channel[3], self.output_channel[3], 3, 1, 1, bias = False),
nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
nn.MaxPool2d(2, 2), # 32x7x7
nn.Conv2d(self.output_channel[3], self.output_channel[3], 3, 1, 0), #32x5x5
nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
nn.MaxPool2d(5, 5) # 32x1x1
)
def forward(self, x):
x = self.ConvNet(x) # [batch, channel, w, h]
x = x.view(-1, self.output_channel[3]) # [batch, 32]
return x
class RecurrentLayer(nn.Module):
def __init__(self, input_size = 32, hidden_size = 128, output_size = 1):
super(RecurrentLayer, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.rnn = nn.LSTMCell(input_size = self.input_size, hidden_size = self.hidden_size)
self.hx_outputs = []
self.cx_outputs = []
self.out = nn.Linear(self.hidden_size, self.output_size)
def forward(self, x):
if len(self.hx_outputs) == 0 & len(self.cx_outputs) == 0:
hx, cx = self.rnn(x) # h0, c0 = 0
self.hx_outputs.append(hx)
self.cx_outputs.append(cx)
else:
hx = self.hx_outputs[-1]
cx = self.cx_outputs[-1]
hx, cx = self.rnn(x, (hx, cx))
self.hx_outputs.append(hx)
self.cx_outputs.append(cx)
prediction = self.out(self.hx_outputs[-1])
return prediction
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.FeatureExtraction = ConvFeatureExtractor()
self.rnn = RecurrentLayer()
def forward(self, input): # input : [batch, step, channel, w, h]
input = input.reshape(-1, 5, 3, 512, 512)
prediction_outputs = []
for i in range(5):
image = input[:, i, :, :, :] # [batch, channel, w, h]
visual_feature = self.FeatureExtraction(image)
prediction = self.rnn(visual_feature)
prediction_outputs.append(prediction)
return prediction_outputs[-1]
For example, if you put dimension of x = torch.randn(bactch_size, 5, 3, 512, 512) into the model, you get this
model = Model()
x = torch.randn(10, 5, 3, 512, 512)
model(x)
tensor([[0.0571],
[0.0822],
[0.0839],
[0.1006],
[0.0962],
[0.1026],
[0.0578],
[0.0550],
[0.0792],
[0.1104]], grad_fn=AddmmBackward)
However, when I try to calculate the loss and backpropagation using “loss.backward”, an error appears as shown below.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [128, 512]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
I searched here and there on the pytorch forum, I realized that there is an inplace modification? in the variable, so an error occurs during the gradient calculation process.
and I was told to use “with torch.autograd.set_detect_anomaly(True):” to find the part where the error occurred and to add “.clone()” to solve the problem.
It’s so frustrating that I don’t know how to do it. Which part of my model is broken?