I encounter a problem about 'Function ScatterAddBackward' when using loss.backward()

I want to use PyTorch and PyTorch-Geometric to train my graph neural network. However, it raises RuntimeError about ‘Function ScatterAddBackward’ when it executes loss.backward() . I checked the shape of my input and output in my code but still cannot find where the problem is.

I give a demo here which can reproduce the issue. My code is executed on RTX 2080ti and windows 10 platform. The version of PyG and PyTorch are 2.0.2 and 1.9.0, respectively. Here is the RuntimeError:

Traceback (most recent call last):
File “demo.py”, line 101, in
File “C:\software\Miniconda3\envs\AI\lib\site-packages\torch_tensor.py”, line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File “C:\software\Miniconda3\envs\AI\lib\site-packages\torch\autograd_init _.py”, line 147, in backward
RuntimeError: Function ScatterAddBackward returned an invalid gradient at index 1 - got [18, 1] but expected shape compatible with [18, 5]

Here is my code:

import torch
import torch_geometric
from torch_geometric.nn import MessagePassing, Linear
import torch.nn.functional as F
from torch_geometric.nn.inits import glorot
from torch_geometric.loader import NeighborLoader

class DemoConv(MessagePassing):

	def __init__(self, n_num: int=3, p_num: int=2, initializer: str='glorot',
					negative_slope: float=0.01):
		super(DemoConv, self).__init__(aggr='add', flow='source_to_target')
		self.n = n_num
		self.p = p_num
		self.negative_slope = negative_slope 									
		self.n_edgL = Linear(2*n_num, n_num, weight_initializer=initializer)
		self.p_edgL = Linear(2*p_num, p_num, weight_initializer=initializer)
		# self.n_tot = Linear(n_num, n_num, weight_initializer=initializer)
		self.n_sca = Linear(n_num, n_num, weight_initializer=initializer)	

		# self.p_tot = Linear(p_num, p_num, weight_initializer=initializer)
		self.p_sca = Linear(n_num+p_num, p_num, weight_initializer=initializer)

	def forward(self, x, edge_index, area, vol, Xts, Xss, Qext):
		''' Args:
			x: 				Size: (num_nodes, n_num + p_num)
			area: 	 		Size: (num_edges, 1)
			vol: 			Size: (num_nodes, 1)
			edge_index: 	Size: (2, num_edges)
			Xts: 			Size: (num_nodes, n_num + p_num)
			Xss: 			Size: (num_nodes, n_num + n_num + p_num)
			Qext: 			Size: (num_nodes, n_num + p_num)
		id_src, id_dst = edge_index 	# source node ID, target node ID
		num_nodes = x.size(0) 			# the number of nodes
		n_Input = torch.cat([x[id_src][:, :self.n], x[id_dst][:, :self.n]], dim=1) 	# Size: (num_edges, 2*n_num)
		p_Input = torch.cat([x[id_src][:, self.n:], x[id_dst][:, self.n:]], dim=1) 	# Size: (num_edges, 2*p_num)
		# InSize: (num_edges, 2*n_num)   OutSize: (num_edges, n_num)
		n_alpha = F.leaky_relu(self.n_edgL(n_Input), self.negative_slope)

		# InSize: (num_edges, 2*p_num)    OutSize: (num_edges, p_num)
		p_alpha = F.leaky_relu(self.p_edgL(p_Input), self.negative_slope)

		alpha = torch.cat([n_alpha, p_alpha], dim=1) 						# Size: (num_edges, n_num + p_num)
		# print('alpha size: {}    area: {}'.format(alpha.size(), area.size()))

		coe_fac = alpha * area 												# Size: (num_edges, n_num + p_num)
		coe_sum = torch.zeros([num_nodes, self.n+self.p]).to('cuda') 		# Size: (num_nodes, n_num + p_num)
		id_dstForScatter = id_dst.view(-1, 1)
		coe_sum.scatter_add_(dim=0, index=id_dstForScatter, src=coe_fac) 	# Size: (num_nodes, n_num + p_num)

		coe_sum = coe_sum + 20.0  											# avoid dividing zero 
		coe_x = coe_fac/coe_sum[id_dst] 									# Size: (num_edges, n_num + p_num)
		return self.propagate(edge_index, x=x, coe_x=coe_x, 
				coe_sum=coe_sum, vol=vol, Xts=Xts, Xss=Xss, Qext=Qext)	

	def message(self, x_j, coe_x):
		return -1.0*coe_x*x_j

	def update(self, aggr_out, x, coe_sum, vol, Xts, Xss, Qext):
		n_sterm = F.leaky_relu(self.n_sca(x[:, :self.n]*Xss[:, :self.n]), self.negative_slope) 	# InSize: (num_nodes, n_num)
		p_sterm = F.leaky_relu(self.p_sca(x*Xss[:, self.n:]), self.negative_slope) 				# InSize: (num_nodes, n_num + p_num)
		Q_s = torch.cat([n_sterm, p_sterm], dim=1)/coe_sum*vol 									# OutSize: (num_nodes, n_num + p_num)
		return aggr_out + Q_s + Qext/coe_sum

class DemoNet(torch.nn.Module):

	def __init__(self):
		super(DemoNet, self).__init__()
		self.conv = DemoConv()

	def forward(self, data):
		x, edge_index, area, vol, Xts, Xss, Qext = data.x, data.edge_index, data.area, data.vol, data.Xts, data.Xss, data.Qext
		x = self.conv(x=x, edge_index=edge_index, area=area, vol=vol, Xts=Xts, Xss=Xss, Qext=Qext)
		return x 

data = torch.load('./data.pth')
trainData = NeighborLoader(data, num_neighbors=[7], batch_size=5, input_nodes=data.train_mask)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DemoNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lossFunc = torch.nn.MSELoss()

for epoch in range(1):
	for batchIdx, data in enumerate(trainData):
		data = data.to(device)
		out = model(data)
		loss = lossFunc(out, data.y)
		if batchIdx == 0 :

I’m sorry for that .zip file is not supported for uploading here so I cannot upload the data file for the code execution.

Sorry to disturb you. I have found where the problem is.