RuntimeError: Invalid index in scatterAdd at /opt/conda/conda-bld/pytorch_1565287124314/work/aten/src/TH/generic/THTensorEvenMoreMath.cpp:536

Beherit666 · November 20, 2019, 6:32pm

Hi experts,
I got this RuntimeError while using PyTorch Geometric
Below is my setup and codes.

Setup:
Python 3.6.8 :: Anaconda custom (64-bit)
torch==1.2.0
torch-cluster==1.4.4
torch-geometric==1.3.1
torch-scatter==1.3.1
torch-sparse==0.4.0
torchbiggraph==1.0.0
torchvision==0.4.0a0+9232c4a

File:
map_small.txt:

FE_RC_201_0 has idx 0
FE_RC_200_0 has idx 1
FFE_RC_195_0 has idx 6
FE_RC_194_0 has idx 7
FE_RC_193_0 has idx 8
FE_RC_190_0 has idx 11
FE_RC_189_0 has idx 12
U11292 has idx 3905
U16494 has idx 8369

graph_small.txt

1 0 2
7 8 2
8 6 2
12 11 2
8369 3905 0.333333

features_le_small.csv

Unnamed: 0,inst_name,cell_name,size_x,size_y,ar,no_term,layer_term,nets,inst_box
1,1789,129,0.28800000000000003,0.5760000000000001,2.0,3.0,7,991,3454
2,1788,154,0.48,0.5760000000000001,1.2,4.0,5,5127,4747
7,1781,201,0.28800000000000003,0.5760000000000001,2.0,3.0,7,989,955
8,1780,103,0.28800000000000003,0.5760000000000001,2.0,2.0,9,9697,916
9,1779,149,0.48,0.5760000000000001,1.2,4.0,5,990,993
12,1776,4,0.384,0.5760000000000001,1.5,3.0,7,987,3141
13,1774,239,0.96,0.5760000000000001,0.6,4.0,5,9510,3200
3906,2881,126,0.28800000000000003,0.5760000000000001,2.0,3.0,7,6051,256
8370,7345,178,0.48,0.5760000000000001,1.2,5.0,3,1818,10885
Code:

#########################################################
### Headers.
#########################################################
#import networkx as nx
import time
start_time = time.time()
import sys
import re
import random
import torch
import numpy as np
import torch.nn.functional as F
from fnmatch import fnmatch 
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

#########################################################
### Variables Declaration.
#########################################################

graphFileIn = "graph_small.txt" #"graph_aes_16_dp.txt" #sys.argv[1]
mapFileIn = "map_small.txt" #"map_aes_16_dp.txt" #sys.argv[2]
featFileIn = "features_le_small.csv" #"features_encoded.csv"
run_name = "1119_test_"

lr = 1e-4
epochs = 20
weight_decay = 5e-4
#########################################################
### Read in netlist graph.
#########################################################

def read_netlist_graph(graphFileIn, mapFileIn):
    edge_v1 = []
    edge_v2 = []
    node_number = 0

    for line in open(mapFileIn, 'r'):
        line = line.split(" ")
        node_number += 1
        inst = line[0]
        num = line[3]
        #print("inst name:", inst, "num:", num)

    for line in open(graphFileIn, 'r'):
        data = line.split()
        node1 = int(data[0])
        node2 = int(data[1])
        weight = data[2]
        edge_v1.append( node1 )
        edge_v2.append( node2 )

    print("edge_v1:", len(edge_v1), "edge_v2:",len(edge_v2))
    print("Node no.",node_number)
    print("Edge1 len", len(edge_v1), "Edge2_len",len(edge_v2))
    print("edge_v1:", edge_v1, "edge_v2:", edge_v2)
    return edge_v1, edge_v2, node_number

#########################################################
### Build netlist graph in PyTorch form.
#########################################################

def build_netlist_graph(edge_v1, edge_v2, node_number):
    edge_index = torch.tensor([edge_v1,
                               edge_v2], 
                              dtype = torch.long)

    print("edge_index:", edge_index)

    i = 1
    feature_v = []

    for line in open(featFileIn, "r"):
        if "inst_name" not in line:
            line = line.rstrip("\n")
            line = line.split(",")
            node = int(line[0])-1
            feat_vec = [round(float(i), 6) for i in line[1:]]
            print("node:",node, "TYPE:", type(feat_vec), feat_vec)
            feature_v.append(feat_vec)
    print("feature_v", feature_v)
        #feature_v.append(random.sample(range(0, 2),1))

    #print("Feature length:", len(feature_v))

    ### Assign features, here assume only one feature per node.
    x = torch.tensor(feature_v, 
                     dtype = torch.float)
    print("X:", x)
    ### Randomly assign labels.
    labels = []
    labels = np.random.randint(3, size = node_number)
    y = torch.tensor(labels, dtype=torch.long)

    data = Data(x = x, 
                edge_index = edge_index,
                y = y,
                num_classes = 3,
    )

    """
    ### Print out data attibutes.
    print("\n\n\n Print out data attributes:")
    print("Data keys:",data.keys)
    print("data['x']:", data['x'])
    print("num_edges:",data.num_edges)
    print("num_nodes:",data.num_nodes)
    print("directed:", data.is_directed())
    print("data.num_node_features:",data.num_node_features)
    print("data.num_classes:",data.num_classes)
    print("data:", data, "\n\n\n")
    """
    ### Train-test split
    data.train_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
    data.train_mask[:int(0.7 * data.num_nodes)] = 1 #train only on the 70% nodes
    data.test_mask = torch.zeros(data.num_nodes, dtype=torch.uint8) #test on 30 % nodes 
    data.test_mask[- int(0.3 * data.num_nodes):] = 1

    print("\n\n\nPrint out train/test data:")
    print("data.train_mask", data.train_mask)
    print("data.test_mask", data.test_mask)

    return data

#########################################################
### Learning on Graphs
#########################################################

class Net(torch.nn.Module):

    ### Constructor
    def __init__(self):

        ### super is used to inherit from the torch.nn.module.
        super(Net, self).__init__()

        #class GCNConv(in_channels, 
        #              out_channels, improved=False, 
        #              cached=False, bias=True, **kwargs)

        self.conv1 = GCNConv(data.num_node_features, 16)
        self.conv2 = GCNConv(16, data.num_classes)


    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        #x = torch.matmul(torch.t(x),self.weight)
        #x = x.squeeze(dim=-1)
        print("x:",x.size(), "e_idx", edge_index.size())
 
        print("x_shape:", x.shape)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training = self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

def graph_learning(data):

    ### Setup the model.
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net().to(device)

    #data = data #dataset[0].to(device)
    data = data.to(device)

    optimizer = torch.optim.Adam( model.parameters(), 
                                  lr = lr, 
                                  weight_decay = weight_decay)
    
    for epoch in range(epochs):

        train_loss = model.train()
        optimizer.zero_grad()
        out = model(data)
        
        loss = F.nll_loss(out[data.train_mask], 
                          data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        model.eval()
        _, pred = model(data).max(dim = 1)

        correct = float (
            pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())

        acc = correct / data.test_mask.sum().item()
        print("\n\nEpoch:", epoch, "Loss:", loss, 'Accuracy: {:.4f}'.format(acc))
    

#########################################################
### Main function.
#########################################################

if __name__ == '__main__':

    (edge_v1, 
    edge_v2, 
    node_number) = read_netlist_graph(graphFileIn, mapFileIn)

    data = build_netlist_graph(edge_v1, edge_v2, node_number)

    graph_learning(data)
    
    print("--- %s seconds ---" % (time.time() - start_time))

Output

edge_v1: 5 edge_v2: 5
Node no. 9
Edge1 len 5 Edge2_len 5
edge_v1: [1, 7, 8, 12, 8369] edge_v2: [0, 8, 6, 11, 3905]
edge_index: tensor([[   1,    7,    8,   12, 8369],
        [   0,    8,    6,   11, 3905]])
node: 0 TYPE: <class 'list'> [1789.0, 129.0, 0.288, 0.576, 2.0, 3.0, 7.0, 991.0, 3454.0]
node: 1 TYPE: <class 'list'> [1788.0, 154.0, 0.48, 0.576, 1.2, 4.0, 5.0, 5127.0, 4747.0]
node: 6 TYPE: <class 'list'> [1781.0, 201.0, 0.288, 0.576, 2.0, 3.0, 7.0, 989.0, 955.0]
node: 7 TYPE: <class 'list'> [1780.0, 103.0, 0.288, 0.576, 2.0, 2.0, 9.0, 9697.0, 916.0]
node: 8 TYPE: <class 'list'> [1779.0, 149.0, 0.48, 0.576, 1.2, 4.0, 5.0, 990.0, 993.0]
node: 11 TYPE: <class 'list'> [1776.0, 4.0, 0.384, 0.576, 1.5, 3.0, 7.0, 987.0, 3141.0]
node: 12 TYPE: <class 'list'> [1774.0, 239.0, 0.96, 0.576, 0.6, 4.0, 5.0, 9510.0, 3200.0]
node: 3905 TYPE: <class 'list'> [2881.0, 126.0, 0.288, 0.576, 2.0, 3.0, 7.0, 6051.0, 256.0]
node: 8369 TYPE: <class 'list'> [7345.0, 178.0, 0.48, 0.576, 1.2, 5.0, 3.0, 1818.0, 10885.0]
feature_v [[1789.0, 129.0, 0.288, 0.576, 2.0, 3.0, 7.0, 991.0, 3454.0], [1788.0, 154.0, 0.48, 0.576, 1.2, 4.0, 5.0, 5127.0, 4747.0], [1781.0, 201.0, 0.288, 0.576, 2.0, 3.0, 7.0, 989.0, 955.0], [1780.0, 103.0, 0.288, 0.576, 2.0, 2.0, 9.0, 9697.0, 916.0], [1779.0, 149.0, 0.48, 0.576, 1.2, 4.0, 5.0, 990.0, 993.0], [1776.0, 4.0, 0.384, 0.576, 1.5, 3.0, 7.0, 987.0, 3141.0], [1774.0, 239.0, 0.96, 0.576, 0.6, 4.0, 5.0, 9510.0, 3200.0], [2881.0, 126.0, 0.288, 0.576, 2.0, 3.0, 7.0, 6051.0, 256.0], [7345.0, 178.0, 0.48, 0.576, 1.2, 5.0, 3.0, 1818.0, 10885.0]]



Print out train/test data:
data.train_mask tensor([1, 1, 1, 1, 1, 1, 0, 0, 0], dtype=torch.uint8)
data.test_mask tensor([0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=torch.uint8)
x: torch.Size([9, 9]) e_idx torch.Size([2, 5])
x_shape: torch.Size([9, 9])
Traceback (most recent call last):
  File "gcn_pytorch_feature.py", line 212, in <module>
    graph_learning(data)
  File "gcn_pytorch_feature.py", line 183, in graph_learning
    out = model(data)
  File "/home/hliu/.conda/envs/hliuPython/lib/python3.6/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "gcn_pytorch_feature.py", line 159, in forward
    x = self.conv1(x, edge_index)
  File "/home/hliu/.conda/envs/hliuPython/lib/python3.6/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/hliu/.conda/envs/hliuPython/lib/python3.6/site-packages/torch_geometric/nn/conv/gcn_conv.py", line 98, in forward
    self.improved, x.dtype)
  File "/home/hliu/.conda/envs/hliuPython/lib/python3.6/site-packages/torch_geometric/nn/conv/gcn_conv.py", line 77, in norm
    deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
  File "/home/hliu/.conda/envs/hliuPython/lib/python3.6/site-packages/torch_scatter/add.py", line 73, in scatter_add
    return out.scatter_add_(dim, index, src)
RuntimeError: Invalid index in scatterAdd at /opt/conda/conda-bld/pytorch_1565287124314/work/aten/src/TH/generic/THTensorEvenMoreMath.cpp:536

ptrblck · November 21, 2019, 5:43am

Could you try to get the actual values, which are used in the scatter_add method or some kind of reproducible code snippet?
I’m currently not sure, what kind of inputs you are using in which module.

Beherit666 · November 21, 2019, 6:29pm

Hi expert,
Sorry I am not sure how to get the real value feeding into scatter_add , could you give me some instruction?
What I do know is that it died on this line shown below, and the dimensions of x and e_idx is:
x: torch.Size([9, 9]) e_idx torch.Size([2, 5])