I want to use PyTorch and PyTorch-Geometric to train my graph neural network. However, it raises RuntimeError about ‘Function ScatterAddBackward’ when it executes loss.backward()
. I checked the shape of my input and output in my code but still cannot find where the problem is.
I give a demo here which can reproduce the issue. My code is executed on RTX 2080ti and windows 10 platform. The version of PyG and PyTorch are 2.0.2 and 1.9.0, respectively. Here is the RuntimeError:
Traceback (most recent call last):
File “demo.py”, line 101, in
loss.backward()
File “C:\software\Miniconda3\envs\AI\lib\site-packages\torch_tensor.py”, line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File “C:\software\Miniconda3\envs\AI\lib\site-packages\torch\autograd_init _.py”, line 147, in backward
Variable._execution_engine.run_backward(
RuntimeError: Function ScatterAddBackward returned an invalid gradient at index 1 - got [18, 1] but expected shape compatible with [18, 5]
Here is my code:
import torch
import torch_geometric
from torch_geometric.nn import MessagePassing, Linear
import torch.nn.functional as F
from torch_geometric.nn.inits import glorot
from torch_geometric.loader import NeighborLoader
class DemoConv(MessagePassing):
def __init__(self, n_num: int=3, p_num: int=2, initializer: str='glorot',
negative_slope: float=0.01):
super(DemoConv, self).__init__(aggr='add', flow='source_to_target')
self.n = n_num
self.p = p_num
self.negative_slope = negative_slope
self.n_edgL = Linear(2*n_num, n_num, weight_initializer=initializer)
self.p_edgL = Linear(2*p_num, p_num, weight_initializer=initializer)
# self.n_tot = Linear(n_num, n_num, weight_initializer=initializer)
self.n_sca = Linear(n_num, n_num, weight_initializer=initializer)
# self.p_tot = Linear(p_num, p_num, weight_initializer=initializer)
self.p_sca = Linear(n_num+p_num, p_num, weight_initializer=initializer)
def forward(self, x, edge_index, area, vol, Xts, Xss, Qext):
''' Args:
x: Size: (num_nodes, n_num + p_num)
area: Size: (num_edges, 1)
vol: Size: (num_nodes, 1)
edge_index: Size: (2, num_edges)
Xts: Size: (num_nodes, n_num + p_num)
Xss: Size: (num_nodes, n_num + n_num + p_num)
Qext: Size: (num_nodes, n_num + p_num)
'''
id_src, id_dst = edge_index # source node ID, target node ID
num_nodes = x.size(0) # the number of nodes
n_Input = torch.cat([x[id_src][:, :self.n], x[id_dst][:, :self.n]], dim=1) # Size: (num_edges, 2*n_num)
p_Input = torch.cat([x[id_src][:, self.n:], x[id_dst][:, self.n:]], dim=1) # Size: (num_edges, 2*p_num)
# InSize: (num_edges, 2*n_num) OutSize: (num_edges, n_num)
n_alpha = F.leaky_relu(self.n_edgL(n_Input), self.negative_slope)
# InSize: (num_edges, 2*p_num) OutSize: (num_edges, p_num)
p_alpha = F.leaky_relu(self.p_edgL(p_Input), self.negative_slope)
alpha = torch.cat([n_alpha, p_alpha], dim=1) # Size: (num_edges, n_num + p_num)
# print('alpha size: {} area: {}'.format(alpha.size(), area.size()))
coe_fac = alpha * area # Size: (num_edges, n_num + p_num)
coe_sum = torch.zeros([num_nodes, self.n+self.p]).to('cuda') # Size: (num_nodes, n_num + p_num)
id_dstForScatter = id_dst.view(-1, 1)
coe_sum.scatter_add_(dim=0, index=id_dstForScatter, src=coe_fac) # Size: (num_nodes, n_num + p_num)
coe_sum = coe_sum + 20.0 # avoid dividing zero
coe_x = coe_fac/coe_sum[id_dst] # Size: (num_edges, n_num + p_num)
return self.propagate(edge_index, x=x, coe_x=coe_x,
coe_sum=coe_sum, vol=vol, Xts=Xts, Xss=Xss, Qext=Qext)
def message(self, x_j, coe_x):
return -1.0*coe_x*x_j
def update(self, aggr_out, x, coe_sum, vol, Xts, Xss, Qext):
n_sterm = F.leaky_relu(self.n_sca(x[:, :self.n]*Xss[:, :self.n]), self.negative_slope) # InSize: (num_nodes, n_num)
p_sterm = F.leaky_relu(self.p_sca(x*Xss[:, self.n:]), self.negative_slope) # InSize: (num_nodes, n_num + p_num)
Q_s = torch.cat([n_sterm, p_sterm], dim=1)/coe_sum*vol # OutSize: (num_nodes, n_num + p_num)
return aggr_out + Q_s + Qext/coe_sum
class DemoNet(torch.nn.Module):
def __init__(self):
super(DemoNet, self).__init__()
self.conv = DemoConv()
def forward(self, data):
x, edge_index, area, vol, Xts, Xss, Qext = data.x, data.edge_index, data.area, data.vol, data.Xts, data.Xss, data.Qext
x = self.conv(x=x, edge_index=edge_index, area=area, vol=vol, Xts=Xts, Xss=Xss, Qext=Qext)
return x
data = torch.load('./data.pth')
trainData = NeighborLoader(data, num_neighbors=[7], batch_size=5, input_nodes=data.train_mask)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DemoNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lossFunc = torch.nn.MSELoss()
model.train()
for epoch in range(1):
for batchIdx, data in enumerate(trainData):
data = data.to(device)
optimizer.zero_grad()
out = model(data)
loss = lossFunc(out, data.y)
loss.backward()
optimizer.step()
if batchIdx == 0 :
break
I’m sorry for that .zip file is not supported for uploading here so I cannot upload the data file for the code execution.