I cant seem to remove all attempts at inplace calculations, something that my algorithm just cannot do. I am relatively new to pytorch, so Im not aware of all the underlying calculations. Any help in removing all inplace calculations would be appreciated.
I am building the model like this:
class OdlVGG16(nn.Module):
def __init__(self, num_classes, input_shape, batch_size, learning_rate):
super(OdlVGG16, self).__init__()
self.layers = []
self.classifiers = []
self.optimizers = []
self.num_classes = num_classes
self.batch_size = batch_size
self.learning_rate = learning_rate
input_features = input_shape[0]
feature_maps = [64, 64]#, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]
for i, feature_map in enumerate(feature_maps):
# Make each convolutional layer
if i == 1 or i == 3 or i == 6 or i == 9 or i == 12:
input_shape[1]/=2
input_shape[2]/=2
self.layers.append(nn.Conv2d(in_channels=input_features,
out_channels=feature_map,
kernel_size=(3,3),
stride=(1,1),
padding=(1,1)))
# Make a trainable classifier for each convolutional layer
self.classifiers.append(nn.Sequential(
nn.Linear(int(feature_map * input_shape[1] * input_shape[2]), 100),
nn.ReLU(inplace=False),
nn.Dropout(inplace=False),
nn.Linear(100, self.num_classes),
))
input_features = feature_map
self.optimizers.append(torch.optim.Adam(self.classifiers[i].parameters(), lr=self.learning_rate))
self.max_layers = len(self.layers)
fill_values = [1/(self.max_layers+1) for i in range(self.max_layers)]
self.alpha = Parameter(torch.full((self.max_layers, 1), (1/(self.max_layers+1))))
Forward and backward pass are computed as shown below. The update_weights function is called per iteration in the main section of the code.
def forward(self, x):
layer_connections = []
classifier_connections = []
layer_connections.append(F.relu(self.layers[0](x), inplace=False))
for i in range(1, self.max_layers):
if i == 1 or i == 3 or i == 6 or i == 9 or i == 12:
layer_connections.append(F.max_pool2d(F.relu(self.layers[i](layer_connections[i-1]),inplace=False),2))
else:
layer_connections.append(F.relu(self.layers[i](layer_connections[i-1]),inplace=False))
for i in range(self.max_layers):
classifier_connections.append(self.classifiers[i](layer_connections[i].view(layer_connections[i].size(0),-1)))
classifier_all_layers = torch.stack(classifier_connections)
return classifier_all_layers
def zero_grad(self):
for i in range(self.max_layers):
self.optimizers[i].zero_grad()
self.layers[i].weight.grad.data = torch.zeros(self.layers[i].weight.grad.data.size())
self.layers[i].bias.grad.data = torch.zeros(self.layers[i].bias.grad.data.size())
def update_weights(self, inputs, labels, criterion):
outputs_per_layer = self(inputs)
loss_per_layer = []
for output in outputs_per_layer:
loss = criterion(output.view(self.batch_size, self.num_classes), labels.view(self.batch_size).long())
loss_per_layer.append(loss)
w = [None] * len(loss_per_layer)
b = [None] * len(loss_per_layer)
with torch.no_grad():
for i in range(len(loss_per_layer)):
loss_per_layer[i].backward(retain_graph=True)
self.optimizers[i].step()
# self.classifiers[i].weight.data -= self.learning_rate * self.alpha[i] * self.classifiers[i].weight.grad.data
# self.classifiers[i].bias.data -= self.learning_rate * self.alpha[i] * self.classifiers[i].bias.grad.data
for j in range(i+1):
if w[j] is None:
w[j] = self.alpha[i] * self.layers[j].weight.grad.data
b[j] = self.alpha[i] * self.layers[j].bias.grad.data
else:
w[j] += self.alpha[i] * self.layers[j].weight.grad.data
b[j] += self.alpha[i] * self.layers[j].bias.grad.data
self.zero_grad()
for i in range(len(loss_per_layer)):
self.layers[i].weight.data -= self.learning_rate * w[i]
self.layers[i].bias.data -= self.learning_rate * b[i]
for i in range(len(loss_per_layer)):
self.alpha[i] *= torch.pow(self.b, loss_per_layer[i])
self.alpha[i] = torch.max(self.alpha[i], self.s/self.max_layers)
self.alpha = Parameter(self.alpha / torch.sum(self.alpha), requires_grad=False)
real_output = torch.sum(torch.mul(
self.alpha.view(self.max_layers, 1).repeat(1, self.batch_size).view(
self.max_layers, self.batch_size, 1), outputs_per_layer), 0)
loss = criterion(real_output.view(self.batch_size, self.num_classes), labels.view(self.batch_size).long())
return loss
Stack trace is as follows:
C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\autograd\__init__.py:130: UserWarning: Error detected in AddmmBackward. Traceback of forward call that caused the error:
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\main.py", line 157, in <module>
main(arguments)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\main.py", line 86, in main
train(args, model, train_val_list)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\train.py", line 36, in train
loss = model.update_weights(inputs, labels, criterion)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\models.py", line 122, in update_weights
outputs_per_layer = self(inputs)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\models.py", line 110, in forward
classifier_connections.append(self.classifiers[i](layer_connections[i].view(layer_connections[i].size(0),-1)))
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\container.py", line 117, in forward
input = module(input)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\linear.py", line 93, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\functional.py", line 1690, in linear
ret = torch.addmm(bias, input, weight.t())
(Triggered internally at ..\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward(
Traceback (most recent call last):
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\main.py", line 157, in <module>
main(arguments)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\main.py", line 86, in main
train(args, model, train_val_list)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\train.py", line 36, in train
loss = model.update_weights(inputs, labels, criterion)
File "C:\Users\fvanbeer\Documents\GitHub\msc\code\models.py", line 131, in update_weights
loss_per_layer[i].backward(retain_graph=True)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\tensor.py", line 221, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "C:\Users\fvanbeer\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\autograd\__init__.py", line 130, in backward
Variable._execution_engine.run_backward(
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [100, 10]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!