Create Graph AutoEncoder for Heterogeneous Graph

othmanelhoufi · September 1, 2022, 3:56pm

After several failed attempts to create a Heterogeneous Graph AutoEncoder It’s time to ask for help.

Here is a sample of my Dataset:

====================
Number of graphs: 560
Number of features: {'article': 769, 'user': 772}

HeteroData(
  y=[1],
  article={ x=[1, 769] },
  user={ x=[67, 772] },
  (user, tweeted, article)={ edge_index=[2, 54] },
  (user, retweeted, user)={ edge_index=[2, 4] },
  (user, liked, user)={ edge_index=[2, 11] }
)
=============================================================
Number of nodes: 68
Number of edges: 69
Average node degree: 1.01
Has isolated nodes: True
Has self-loops: False
Is undirected: False

I tried to follow these two tutorials in the PyTorch-Geometric documentation:

And here is what I wrote:

from dataset import FakeNewsDataset
from torch.utils.data import random_split

from torch_geometric.nn import TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.nn import GCNConv, GAE, GATConv, Linear, to_hetero
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
import torch 
from tqdm import tqdm

class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels)
        self.conv2 = GCNConv(2 * out_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)


def train(model, optimizer, train_loader, training_set):
    
    loss_all = 0
    for data in tqdm(train_loader, leave=False):
        data = data.to(device)
        optimizer.zero_grad()
        z = model.encode(data.x, data.edge_index)
        loss = model.recon_loss(z, data.pos_edge_label_index)
        loss.backward()
        optimizer.step()
        loss_all += data.num_graphs * loss.item()
    return loss_all / len(training_set)

    
def main():
    dataset = FakeNewsDataset(name='politifact', encoder='all-MiniLM-L6-v2')
    
    num_training = int(len(dataset) * 0.6)
    num_val = int(len(dataset) * 0.2)
    num_test = len(dataset) - (num_training + num_val)
    training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])
    
    num_epochs = 4
    batch_size = 10
    in_channels, out_channels = dataset.num_features['article'], 128
    
    # model =  GAE(GCNEncoder(in_channels, out_channels)).to(device)
    model = GAE(GCNEncoder(in_channels, out_channels))
    model = to_hetero(model, dataset[0].metadata(), aggr='sum')
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    train_loader = DataLoader(training_set, batch_size=batch_size)
    
    for epoch in tqdm(range(num_epochs)):
        loss = train(model, optimizer, train_loader, training_set)
        
        print('Epoch: {:03d}, Loss: {:.5f}'.
          format(epoch, loss))
    print(len(training_set), len(validation_set), len(test_set))
    
if __name__ == "__main__":
    main()

FYI the error I get is: NotImplementedError: Module [GAE] is missing the required “forward” function

But when I execute the example set by PyTorch-Geometric on Github it works just fine. So I’m guessing that GAE is not working well with my Heterogeneous Graph.

MichZipp · January 23, 2023, 11:45pm

Could you resolve this issue? I ran into the same problem.

CarlosC · February 21, 2024, 9:25am

I ran also into the same problme, did you find a way to resolve the issue ?