After several failed attempts to create a Heterogeneous Graph AutoEncoder It’s time to ask for help.
Here is a sample of my Dataset:
====================
Number of graphs: 560
Number of features: {'article': 769, 'user': 772}
HeteroData(
y=[1],
article={ x=[1, 769] },
user={ x=[67, 772] },
(user, tweeted, article)={ edge_index=[2, 54] },
(user, retweeted, user)={ edge_index=[2, 4] },
(user, liked, user)={ edge_index=[2, 11] }
)
=============================================================
Number of nodes: 68
Number of edges: 69
Average node degree: 1.01
Has isolated nodes: True
Has self-loops: False
Is undirected: False
I tried to follow these two tutorials in the PyTorch-Geometric documentation:
- Heterogeneous Graph Learning — pytorch_geometric documentation
- pytorch_geometric/autoencoder.py at master · pyg-team/pytorch_geometric · GitHub
And here is what I wrote:
from dataset import FakeNewsDataset
from torch.utils.data import random_split
from torch_geometric.nn import TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.nn import GCNConv, GAE, GATConv, Linear, to_hetero
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
import torch
from tqdm import tqdm
class GCNEncoder(torch.nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv1 = GCNConv(in_channels, 2 * out_channels)
self.conv2 = GCNConv(2 * out_channels, out_channels)
def forward(self, x, edge_index):
x = self.conv1(x, edge_index).relu()
return self.conv2(x, edge_index)
def train(model, optimizer, train_loader, training_set):
loss_all = 0
for data in tqdm(train_loader, leave=False):
data = data.to(device)
optimizer.zero_grad()
z = model.encode(data.x, data.edge_index)
loss = model.recon_loss(z, data.pos_edge_label_index)
loss.backward()
optimizer.step()
loss_all += data.num_graphs * loss.item()
return loss_all / len(training_set)
def main():
dataset = FakeNewsDataset(name='politifact', encoder='all-MiniLM-L6-v2')
num_training = int(len(dataset) * 0.6)
num_val = int(len(dataset) * 0.2)
num_test = len(dataset) - (num_training + num_val)
training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])
num_epochs = 4
batch_size = 10
in_channels, out_channels = dataset.num_features['article'], 128
# model = GAE(GCNEncoder(in_channels, out_channels)).to(device)
model = GAE(GCNEncoder(in_channels, out_channels))
model = to_hetero(model, dataset[0].metadata(), aggr='sum')
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
train_loader = DataLoader(training_set, batch_size=batch_size)
for epoch in tqdm(range(num_epochs)):
loss = train(model, optimizer, train_loader, training_set)
print('Epoch: {:03d}, Loss: {:.5f}'.
format(epoch, loss))
print(len(training_set), len(validation_set), len(test_set))
if __name__ == "__main__":
main()
FYI the error I get is: NotImplementedError: Module [GAE] is missing the required “forward” function
But when I execute the example set by PyTorch-Geometric on Github it works just fine. So I’m guessing that GAE is not working well with my Heterogeneous Graph.