Here are my model. I believe that I use none inplace operation in my code.
import torch
import torch.nn as nn
class DIAMLayer(nn.Module):
def __init__(self, in_features, head_num):
super(DIAMLayer, self).__init__()
self.in_features = in_features
self.head_num = head_num
# models
self.multihead = nn.MultiheadAttention(embed_dim=self.in_features,
num_heads=self.head_num,
dropout=0.2)
self.sigmoid = nn.Sigmoid()
self.up_linear = nn.Linear(self.in_features, self.in_features)
self.vp_linear = nn.Linear(self.in_features, self.in_features)
self.ug_linear = nn.Linear(self.in_features, self.in_features)
self.vg_linear = nn.Linear(self.in_features, self.in_features)
def forward(self, m, p, g):
p_h = p.ndata['h'].unsqueeze(1)
g_h = g.ndata['h'].unsqueeze(1)
sp, _ = self.multihead(m, p_h, p_h)
z = self.up_linear(m) + self.vp_linear(sp)
z = self.sigmoid(z)
sp = torch.mul(z, m) + torch.mul((1 - z), sp)
sg, _ = self.multihead(sp, g_h, g_h)
z = self.ug_linear(sp) + self.vg_linear(sg)
z = self.sigmoid(z)
m = torch.mul(z, sp) + torch.mul((1 - z), sg)
return m
After following some instructions from Hints of error message, I got a finial error message as following,
Warning: Error detected in MmBackward. Traceback of forward call that caused the error:
File "/Users/empramsesii/Code/structDetect/new_train.py", line 74, in <module>
train_process(args.num_graphs, args.batch_size, args.set_size, args.use_record)
File "/Users/empramsesii/Code/structDetect/new_train.py", line 39, in train_process
m = model(m, pattern, graph)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/Users/empramsesii/Code/structDetect/graph/diam.py", line 29, in forward
z = self.ug_linear(sp) + self.vg_linear(sg)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/nn/modules/linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/nn/functional.py", line 1612, in linear
output = input.matmul(weight.t())
(print_stack at ../torch/csrc/autograd/python_anomaly_mode.cpp:60)
Traceback (most recent call last):
File "/Users/empramsesii/Code/structDetect/new_train.py", line 74, in <module>
train_process(args.num_graphs, args.batch_size, args.set_size, args.use_record)
File "/Users/empramsesii/Code/structDetect/new_train.py", line 42, in train_process
loss.backward(retain_graph=True)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/tensor.py", line 198, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torch/autograd/__init__.py", line 100, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [17, 17]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
This is only one of my experiments about this problem. But all these experiments indicates that the root of this problem is from torch.nn.functional.linear. I have tried to rewrite this function, but never worked.
I tried to downgrade torch to version 1.4. This problem disappeared. However, it still need to set backward as loss.backward(retain_graph=True)