Hello! I hope it’s ok I’m asking, but I’m having troubles with my nn code. It seems as if the loss dosen’t decrease and when running in debug mode I encounter this problem:
Exception has occurred: RuntimeError
one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [10, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
File “…/FinalProject/trip.py”, line 167, in
loss.backward(retain_graph=True)
I’ve been searching and reading online answers and tried many things but can’t figure this out. I’d love to get your help.
The whole traceback:
[W python_anomaly_mode.cpp:104] Warning: Error detected in AddmmBackward. Traceback of forward call that caused the error:
File “…/anaconda3/lib/python3.8/runpy.py”, line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File “…/anaconda3/lib/python3.8/runpy.py”, line 87, in _run_code
exec(code, run_globals)
File “…/.vscode-server/extensions/ms-python.python-2021.9.1191016588/pythonFiles/lib/python/debugpy/main.py”, line 45, in
cli.main()
File “/…/.vscode-server/extensions/ms-python.python-2021.9.1191016588/pythonFiles/lib/python/debugpy/…/debugpy/server/cli.py”, line 444, in main
run()
File “…/.vscode-server/extensions/ms-python.python-2021.9.1191016588/pythonFiles/lib/python/debugpy/…/debugpy/server/cli.py”, line 285, in run_file
runpy.run_path(target_as_str, run_name=compat.force_str(“main”))
File “/…/anaconda3/lib/python3.8/runpy.py”, line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File “/…/anaconda3/lib/python3.8/runpy.py”, line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File “/…/anaconda3/lib/python3.8/runpy.py”, line 87, in _run_code
exec(code, run_globals)
File “…/PycharmProjects/FinalProject/trip.py”, line 156, in
out_q,out_p,out_n = model(query,pos,neg) # triplets and anchors into the nn
File “/…/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 889, in _call_impl
result = self.forward(*input, **kwargs)
File “/…/PycharmProjects/FinalProject/trip.py”, line 94, in forward
out_n = self.net(neg)
File “/…/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 889, in _call_impl
result = self.forward(*input, **kwargs)
File “/…/anaconda3/lib/python3.8/site-packages/torch/nn/modules/container.py”, line 119, in forward
input = module(input)
File “/…/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 889, in _call_impl
result = self.forward(*input, **kwargs)
File “/…/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py”, line 94, in forward
return F.linear(input, self.weight, self.bias)
File “/…/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py”, line 1753, in linear
return torch._C._nn.linear(input, weight, bias)
(function _print_stack)
Code attached here: It’s supposed to train on triplets I’ve generated and calculate triplet loss.
==================================
Neural Network
==================================
Define the Network Class
=========================
class MyNetwork(nn.Module):
def __init__(self):
# call constructor from superclass
super().__init__()
# define network layers
self.net = nn.Sequential(
# Hidden Layer 1
nn.Linear(num_of_features, 100),
nn.ReLU(),
# Hidden Layer 2
nn.Linear(100, 10),
nn.ReLU(),
# Output Layer
nn.Linear(10, 1)
)
def forward(self, query, pos, neg):
out_q = self.net(query)
out_p = self.net(pos)
out_n = self.net(neg)
out_q = torch.clone(out_q)
out_p = torch.clone(out_p)
out_n = torch.clone(out_n)
return out_q,out_p,out_n
Instantiate the model and send to cuda device:
---------------------------------------------
model = MyNetwork()
model.to(device)
Loss Criterion
================
triplet_loss_fn = nn.TripletMarginLoss(margin=1.0, p=2)
Optimization
=============
optimizer type:
---------------
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)#,nesterov=True)
=========================
Train phase
=========================
params = list(model.parameters())
print(len(params))
train_loader = DataLoader(train_data, batch_size=batch_size,shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size,shuffle=True)
train_losses, train_accuracy, val_losses, val_accuracy = ([] for i in range(4)) # create four empty lists
model.train()
triplet_train_losses=[]
epochs=20
print(“Starting Train Loop”)
for epoch in range(epochs): # loop over the dataset multiple times
loss = 0.0
batches_acc = 0
triplets_num = 0
loss_tri = 0
loss_phy = 0
# iterate over the data
for batch_idx, data in enumerate(train_loader):
# data = data.to(device) # move data to the GPU (when using a GPU)
#features = (data[:,:,:-1].float()).to(device)
features = torch.clone(data[:,:,:-1]).float().to(device)
labels = torch.clone(data[:,0,-1]).to(device)
query = torch.clone(features[:,0,:]).to(device)
pos = torch.clone(features[:,1,:]).to(device)
neg = torch.clone(features[:,2,:]).to(device)
# Forward pass:
out_q,out_p,out_n = model(query,pos,neg) # triplets and anchors into the nn
out_q = torch.clone(out_q).to(device)
out_p = torch.clone(out_p).to(device)
out_n = torch.clone(out_n).to(device)
# compute loss
loss = torch.clone(loss + triplet_loss_fn(out_q,out_p,out_n))
# zero the parameter gradients
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
params = list(model.parameters())
grads0=(list(model.parameters())[0].grad)
grads1=(list(model.parameters())[1].grad)
grads2=(list(model.parameters())[2].grad)
grads3=(list(model.parameters())[3].grad)
grads4=(list(model.parameters())[4].grad)
grads5=(list(model.parameters())[5].grad)
# Normalizing the loss by the total number of train batches
num_batches = len(train_loader)
train_losses.append(loss/num_batches)
print("Epoch: {0} |loss: {1}% |".format(epoch+1, train_losses[-1]))
Thank you very much!!!