# In-place operation keeps happening

Hi,

I don’t understand how to deal with the in-palce operations while guarantee autograd happened properly. In my code, I first define 3 mlps with the same structure, then using them to modify tensors.
I’m using `tensor.reshape` to avoid creating new tensors, but in-place operations keeps hapenning.

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [5, 2, 20]], which is output 0 of torch::autograd::CopySlices, is at version 20; expected version 10 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

``````    def forward(self, F_ue, Mesg_ue, E, P, Noise):
'''
:param F_ue: of size [2MN,K], randomly generated at the first iteration
:param Mesg_ue: of size[K,M,2MN], randomly generated at the first iteration
:param E: of size [M,K,2MN]
:param P: of size [M,1]
:param Noise: of size [K,1]
:return: F_ue_update, agg_ue
'''
M = P.size()
M = M
K = Noise.size()
K = K
N = E.size(dim=2)//2
agg_ue = torch.mean(Mesg_ue, dim=0) # of size [M,2MN]
Mesg_bs = Mesg_ue.reshape(M,K,2*M*N)
for m in range(M):
for k in range(K):
Mesg_bs[m,k,:] = self.mlp1(E[m,k,:], agg_ue[m,:], P[m], Noise[k])  # it seems like in-place operation happens here
# Mesg_bs <CopySlices object at 0x2b410bc78c10>

agg_bs = torch.mean(Mesg_bs, dim=0) # of size [K,2MN]
Mesg_ue = Mesg_bs.reshape(K,M,2*M*N)
for k in range(K):
for m in range(M):
Mesg_ue[k,m,:] = self.mlp2(F_ue[:,k], E[m,k,:], agg_bs[k,:], P[m], Noise[k])
# Mesg_ue <AsStridedBackward0 object at 0x2b410bc78c70>

for k in range(K):
E_cat = torch.cat([E[m, k, :] for m in range(M)], dim=0)
F_ue[:,k] = self.mlp3(E_cat, F_ue[:,k], agg_bs[k,:], P, Noise[k])
# F_ue <CopySlices object at 0x2b410bc78c10>

return F_ue, Mesg_ue

F_ue0 = torch.rand(2 * M * N, K, requires_grad = True)
# F_ue0 <ToCopyBackward0 object at 0x2b410bc78be0>
Mesg_ue0 = torch.rand(K, M, 2 * M * N, requires_grad = True)
# Mesg_ue <ToCopyBackward0 object at 0x2b410bc78be0>
F_ue, Mesg_ue = model(F_ue0, Mesg_ue0, E, P, Noise)
loss = Loss(F_ue)
loss.backward()

``````

Also, since only F_ue is used in calculating loss, I’m curious about whether the parameters of mlp1 and mlp2 will be updated or not.
Thanks for any help.

Hi @sunnylyu,

Can you share the code of the `self.mlp1` object? (And the rest of the class too, so it’s a minimal reproducible example).

The stacktrace with the `torch.autograd.set_detect_anamoly` enabled would be useful too!

``````class NodeUpdateLayer(torch.nn.Module):
def __init__(self, edge_feature_size, num_antenna, num_BS):
'''
:param edge_feature_size: int, the dimension of edge_feature, e_mk of size 2*N
:param num_antenna: int, number of antennas at each BS
:param num_BS: int, number of BS
'''
super(NodeUpdateLayer, self).__init__()

# MLP for generating message at BS
self.mlp1 = torch.nn.Sequential(
Linear(edge_feature_size + 2*num_BS*num_antenna + 2, 512),  # power and noise should also be input
torch.nn.ReLU(),
Linear(512, 512),
torch.nn.ReLU(),
Linear(512, 512),
torch.nn.ReLU(),
Linear(512, 2*num_BS*num_antenna)
)

# MLP for generating message at UE
self.mlp2 = torch.nn.Sequential(
Linear(2*num_BS*num_antenna + edge_feature_size + 2*num_BS*num_antenna + 2, 512),
torch.nn.ReLU(),
Linear(512, 512),
torch.nn.ReLU(),
Linear(512, 512),
torch.nn.ReLU(),
Linear(512, 2*num_BS*num_antenna)
)

# MLP for updating the UE representation
self.mlp3 = torch.nn.Sequential(
Linear(2*num_BS*num_antenna + num_BS*edge_feature_size + 2*num_BS*num_antenna + 2, 512),
torch.nn.ReLU(),
Linear(512, 512),
torch.nn.ReLU(),
Linear(512, 512),
torch.nn.ReLU(),
Linear(512, 2*num_BS*num_antenna)
)

self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
self.to(self.device)
self._wrap_parameters()
self._init_weights()

def _wrap_parameters(self):
for module in self.modules():
if isinstance(module, Linear):
module.weight = torch.nn.Parameter(module.weight)
module.bias = torch.nn.Parameter(module.bias)

def _init_weights(self):
for m in self.modules():
if isinstance(m, Linear):
torch.nn.init.xavier_uniform_(m.weight, gain=1.0)  # 设置gain参数为非零值
torch.nn.init.constant_(m.bias, 0.4)

def Message_bs(self, edge_feature, agg_ue, power, noise):
message_bs = self.mlp1(torch.cat((edge_feature, agg_ue, power, noise), 0))
if self.device.type == "cuda":
message_bs = message_bs.to(self.device)
return message_bs

def Message_ue(self, f_ue, edge_feature, agg_bs, power, noise):
message_ue = self.mlp2(torch.cat((f_ue, edge_feature, agg_bs, power, noise), 0))
if self.device.type == "cuda":
message_ue = message_ue.to(self.device)
return message_ue

def Update(self, E_cat, f_ue, agg_bs, power, noise):
f_ue_update = self.mlp3(torch.cat((E_cat, f_ue, agg_bs, power, noise), 0))
if self.device.type == "cuda":
f_ue_update = f_ue_update.to(self.device)
return f_ue_update

def forward(self, F_ue, Mesg_ue, Mesg_bs, E, P, Noise):
'''
:param F_ue: of size [2MN,K], randomly generated at the first iteration
:param Mesg_ue: of size[K,M,2MN], randomly generated at the first iteration
:param E: of size [M,K,2MN]
:param P: of size [M,1]
:param Noise: of size [K,1]
:return: F_ue_update, agg_ue
'''
M = P.size()
M = M
K = Noise.size()
K = K
N = E.size(dim=2)//2
agg_ue = torch.mean(Mesg_ue, dim=0) # of size [M,2MN]
#Mesg_bs = Mesg_ue.reshape(M,K,2*M*N)
for m in range(M):
for k in range(K):
Mesg_bs[m,k,:] = self.Message_bs(E[m,k,:], agg_ue[m,:], P[m], Noise[k])

agg_bs = torch.mean(Mesg_bs, dim=0) # of size [K,2MN]
#Mesg_ue = Mesg_bs.reshape(K,M,2*M*N)
for k in range(K):
for m in range(M):
Mesg_ue[k,m,:] = self.Message_ue(F_ue[:,k], E[m,k,:], agg_bs[k,:], P[m], Noise[k])