Encountering "RuntimeError: CUDA error: device-side assert triggered" while running ClusterGCN

I am facing this error while running cluster GCN. I am not able to understand why am getting this error and how to resolve it. I tried setting up conda venv for pytorch3d and torch-geometric on my local system to run this code on the CPU but failed multiple times coz of torch-sparse.
I am trying to build a basic code inspired by https://github.com/pyg-team/pytorch_geometric/blob/master/examples/cluster_gcn_ppi.py

All suggestions and help will be appreciated. Thanks in advance!

import os
import torch
import torch.nn.functional as F
from torch_geometric.loader import ClusterData, ClusterLoader, DataLoader
from torch_geometric.nn import BatchNorm, SAGEConv
from torch_geometric.data import Data

class ClusterGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                    noise_dim, device, dropout, n_partitions, batch_size,
                    num_layers, saving_dir, shuffle=False):
        super(ClusterGCN, self).__init__()
        self.in_channels = in_channels
        self.hidden_channels = hidden_channels
        self.out_channels = out_channels
        self.n_partitions = n_partitions
        self.batch_size = batch_size
        self.saving_dir = saving_dir
        self.device = device
        self.num_layers = num_layers
        self.shuffle = shuffle
        self.conv0 = SAGEConv(in_channels, hidden_channels)
        self.conv = SAGEConv(hidden_channels, hidden_channels)
        self.convfinal = SAGEConv(hidden_channels, out_channels)
        self.bn = BatchNorm(hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels + noise_dim, hidden_channels)
        self.dropout = torch.nn.Dropout(p=dropout)
        self.num_layers = num_layers

    def forward(self, x, adj, noise):
        # get edge matrix and create the graph
        # for clustering and loader
        edge_index = adj.coalesce().indices()
        train_dataset = Data(x=x, edge_index=edge_index)

        # clustering and loading the train_data
        clustered_train_dataset = ClusterData(train_dataset,
                                            num_parts=self.n_partitions,
                                            recursive=False,
                                            save_dir=self.saving_dir)
        train_dataloader = ClusterLoader(clustered_train_dataset,
                                        batch_size=self.batch_size,
                                        shuffle=self.shuffle,
                                        num_workers=0)

        # train the model for the given object mesh
        # partitioned into subgraphs
        outputs = list()
        print("length of the train_dataloader: ", len(train_dataloader))
        for idx, data in enumerate(train_dataloader):
            print("idx:", idx)
            data = data.to(self.device)
            x, edge_index = data.x, data.edge_index
            x = x.to(self.device)
            edge_index = edge_index.to(self.device)

            # initial layer
            # x: (N, in_channels)
            x = self.dropout(F.relu(self.bn(self.conv0(x, edge_index)))) # (N, hidden)
            # print("x0 shape: ", x.shape)
            # adding noise
            # cluster_noise = noise
            # cluster_noise = cluster_noise.unsqueeze(dim=0).repeat(x.shape[0],1)  # (N,noise_dim)
            # print("noise shape: ", cluster_noise.shape)

            x = torch.cat((x, cluster_noise), dim=1)   # (N, noise_dim + hidden)
            x = self.lin(x)     # (N, hidden)
            # print("x+noise shape after lin: ", x.shape)

            # in-between layers
            for _ in range(self.num_layers-2):
                # x: (N, hidden)
                x = self.dropout(F.relu(self.bn(self.conv(x, edge_index))))
            # print("x in between shape: ", x.shape)
            # final layer
            x = self.convfinal(x, edge_index)   # (N, out_channels)
            outputs.append(x)

        texture = torch.cat(outputs, dim=0).to(self.device) # (V, channels)
        return texture
Traceback (most recent call last):
  File "/home/gauravs/github/arete/arete-realsim/gnn.py", line 213, in <module>
    val_fid = get_fid(shapenet_path, netG, val_set, device, dual, out_res, category=category,
  File "/home/gauravs/github/arete/arete-realsim/Utils/fid.py", line 178, in get_fid
    texture = model(features, adj, noise).to(device).unsqueeze(dim=0)
  File "/home/gauravs/miniconda3/envs/pytorch3d_gpu/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/gauravs/github/arete/arete-realsim/Models/clustergcn.py", line 38, in forward
    clustered_train_dataset = ClusterData(train_dataset,
  File "/home/gauravs/miniconda3/envs/pytorch3d_gpu/lib/python3.9/site-packages/torch_geometric/loader/cluster.py", line 58, in __init__
    self.data = self.__permute_data__(data, perm, adj)
  File "/home/gauravs/miniconda3/envs/pytorch3d_gpu/lib/python3.9/site-packages/torch_geometric/loader/cluster.py", line 70, in __permute_data__
    out[key] = item[node_idx]
RuntimeError: CUDA error: device-side assert triggered

Most likely the indexing operation fails.
Rerun your code with export CUDA_LAUNCH_BLOCKING=1 python script.py args which should point to the failing operation. In case it’s still the indexing op, make sure the node_idx contains a valid shape and valid values for item.