IndexError: index 45819 is out of bounds for dimension 0 with size 45011

DRISS_ELALAOUI · March 16, 2021, 7:00pm

I’ve been stuck here for a while now. help me please.
here i define a Minibatch & Neighbor Sampler for a CGMC Model as follows:

class MinibatchSampler(object):
    def __init__(self, graph, num_layers):
        self.graph = graph
        self.num_layers = num_layers
        
    def sample(self, batch):
        # Convert the list of user-item-rating triplets into a triplet of users, items, and ratings
        users, items, ratings = zip(*batch)
        users = torch.stack(users)
        items = torch.stack(items)
        ratings = torch.stack(ratings)
        
        # Create a pair graph (Step 1)
        pair_graph = dgl.heterograph(
            {('user', 'watched', 'item'): (users, items)},
            num_nodes_dict={'user': self.graph.number_of_nodes('user'), 'item': self.graph.number_of_nodes('item')})
        
        # Compact the graph (Step 2)
        pair_graph = dgl.compact_graphs(pair_graph)
        # Assign ratings to the graph
        pair_graph.edata['rating'] = ratings
        
        # Construct blocks (Step 3)
        seeds = {'user': pair_graph.nodes['user'].data[dgl.NID],
                 'item': pair_graph.nodes['item'].data[dgl.NID]}
        blocks = self.construct_blocks(seeds, (users, items))
        
        # Copy node features from original graph to the sampled block.
        # Note that for our model we only need to copy the features to the source side of the first block.
        # The node features of other blocks would be computed by our model.
        for feature_name in self.graph.nodes['user'].data.keys():
            blocks[0].srcnodes['user'].data[feature_name] = \
                self.graph.nodes['user'].data[feature_name][blocks[0].srcnodes['user'].data[dgl.NID]]
        for feature_name in self.graph.nodes['item'].data.keys():
            blocks[0].srcnodes['item'].data[feature_name] = \
                self.graph.nodes['item'].data[feature_name][blocks[0].srcnodes['item'].data[dgl.NID]]

        return pair_graph, blocks
        
    def construct_blocks(self, seeds, user_item_pairs_to_remove):
        blocks = []
        users, items = user_item_pairs_to_remove
        for i in range(self.num_layers):
            # We take all neighbors to form the sampled graph for computing the node representations on the
            # current layer.
            sampled_graph = dgl.in_subgraph(self.graph, seeds)
            # Find the sampled edge IDs for both directions
            sampled_eids = sampled_graph.edges['watched'].data[dgl.EID]
            sampled_eids_rev = sampled_graph.edges['watched-by'].data[dgl.EID]
            
            # A subtlety of rating prediction and link prediction is that, when we train on the pair of user A
            # and item 1, we don't want to actually tell the GNN that "user A has a connection to item 1".  So
            # we should remove all edges connecting the training pairs from the sampled graph.
            _, _, edges_to_remove = sampled_graph.edge_ids(users, items, etype='watched', return_uv=True)
            _, _, edges_to_remove_rev = sampled_graph.edge_ids(items, users, etype='watched-by', return_uv=True)
            
            sampled_with_edges_removed = sampled_graph
            if len(edges_to_remove) > 0:
                sampled_with_edges_removed = dgl.remove_edges(sampled_with_edges_removed, edges_to_remove, 'watched')
                sampled_eids = sampled_eids[sampled_with_edges_removed.edges['watched'].data[dgl.EID]]
            if len(edges_to_remove_rev) > 0:
                sampled_with_edges_removed = dgl.remove_edges(
                    sampled_with_edges_removed, edges_to_remove_rev, 'watched-by')
                sampled_eids_rev = sampled_eids_rev[sampled_with_edges_removed.edges['watched-by'].data[dgl.EID]]
            
            # Create a block from the sampled graph.
            block = dgl.to_block(sampled_with_edges_removed, seeds)
            blocks.insert(0, block)
            seeds = {'user': block.srcnodes['user'].data[dgl.NID],
                     'item': block.srcnodes['item'].data[dgl.NID]}
            
            # Copy the ratings to the edges of the sampled block
            block.edges['watched'].data['rating'] = \
                self.graph.edges['watched'].data['rating'][sampled_eids]
            block.edges['watched-by'].data['rating'] = \
                self.graph.edges['watched-by'].data['rating'][sampled_eids_rev]
            
        return blocks

Training Loop

import tqdm

# In this tutorial we consider 1-layer GNNs.
NUM_LAYERS = 1
BATCH_SIZE = 500
NUM_EPOCHS = 50
HIDDEN_DIMS = 8

sampler = MinibatchSampler(graph, NUM_LAYERS)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, collate_fn=sampler.sample, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, collate_fn=sampler.sample, shuffle=False)

model = GCMCRating(graph.number_of_nodes('user'), graph.number_of_nodes('item'), HIDDEN_DIMS, 5, NUM_LAYERS)
opt = torch.optim.Adam(model.parameters())

for _ in range(NUM_EPOCHS):
    model.train()
    with tqdm.tqdm(train_dataloader) as t:
        for pair_graph, blocks in t:
            user_emb, item_emb = model(blocks)
            prediction = model.compute_score(pair_graph, user_emb, item_emb)
            loss = ((prediction - pair_graph.edata['rating']) ** 2).mean()
            opt.zero_grad()
            loss.backward()
            opt.step()
            t.set_postfix({'loss': '%.4f' % loss.item()}, refresh=False)
    model.eval()
    with tqdm.tqdm(test_dataloader) as t:
        with torch.no_grad():
            predictions = []
            ratings = []
            for pair_graph, blocks in t:
                user_emb, item_emb = model(blocks)
                prediction = model.compute_score(pair_graph, user_emb, item_emb)
                predictions.append(prediction)
                ratings.append(pair_graph.edata['rating'])

            predictions = torch.cat(predictions, 0)
            ratings = torch.cat(ratings, 0)
    print('RMSE:', rmse(predictions, ratings).item())

when i run the code i get the following error

IndexError                                Traceback (most recent call last)

<ipython-input-13-13f94a041bf4> in <module>()
     19     model.train()
     20     with tqdm.tqdm(train_dataloader) as t:
---> 21         for pair_graph, blocks in t:
     22             user_emb, item_emb = model(blocks)
     23             prediction = model.compute_score(pair_graph, user_emb, item_emb)

5 frames

<ipython-input-8-f1fb9156ee92> in construct_blocks(self, seeds, user_item_pairs_to_remove)
     56             if len(edges_to_remove) > 0:
     57                 sampled_with_edges_removed = dgl.remove_edges(sampled_with_edges_removed, edges_to_remove, 'watched')
---> 58                 sampled_eids = sampled_eids[sampled_with_edges_removed.edges['watched'].data[dgl.EID]]
     59             if len(edges_to_remove_rev) > 0:
     60                 sampled_with_edges_removed = dgl.remove_edges(

IndexError: index 45819 is out of bounds for dimension 0 with size 45011