I’ve tried to build a GCN to train my own data which are nodes with only one feature on each node.
However I encountered a problem, how can I define attributes “train_mask”, “test_mask”, “val_mask” like what they have in the built-in dataset?
My code:
#########################################################
### Headers.
#########################################################
import sys
import re
import random
import torch
import torch.nn.functional as F
from fnmatch import fnmatch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
#########################################################
### Variables Declaration.
#########################################################
graphFileIn = "graph_001.txt" #sys.argv[1]
mapFileIn = "map_001.txt" #sys.argv[2]
run_name = ""
lr = 1e-3
epochs = 100
weight_decay = 5e-4
#########################################################
### Read in netlist graph.
#########################################################
def read_netlist_graph(graphFileIn, mapFileIn):
edge_v1 = []
edge_v2 = []
node_number = 0
for line in open(graphFileIn, 'r'):
data = line.split()
node1 = int(data[0])
node2 = int(data[1])
weight = data[2]
edge_v1.append( node1 )
edge_v2.append( node2 )
print("v1:", len(edge_v1), "v2:",len(edge_v2))
for line in open(mapFileIn, 'r'):
node_number += 1
print("Node no.",node_number)
print("Edge1 len", len(edge_v1), "Edge2_len",len(edge_v2))
return edge_v1, edge_v2, node_number
#########################################################
### Build netlist graph in PyTorch form.
#########################################################
def build_netlist_graph(edge_v1, edge_v2, node_number):
edge_index = torch.tensor([edge_v1,
edge_v2],
dtype = torch.long)
print("edge_idx:", edge_index)
i = 1
feature_v = []
for i in range(node_number):
feature_v.append(random.sample(range(0, 2),1))
print("Feature length:", len(feature_v))
x = torch.tensor(feature_v,
dtype = torch.float)
data = Data(x = x,
edge_index = edge_index,
#edge_index = edge_index.t().contiguous(),
num_classes = 2,
)
print("Data keys:",data.keys)
print("data['x']:", data['x'])
print("num_edges:",data.num_edges)
print("num_nodes:",data.num_nodes)
print("directed:", data.is_directed())
print("data.num_node_features:",data.num_node_features)
return data
#########################################################
### Learning on Graphs
#########################################################
class Net(torch.nn.Module):
### Constructor
def __init__(self):
### super is used to inherit from the torch.nn.module.
super(Net, self).__init__()
#class GCNConv(in_channels,
# out_channels, improved=False,
# cached=False, bias=True, **kwargs)
self.conv1 = GCNConv(data.num_node_features, 16)
self.conv2 = GCNConv(16, data.num_classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
print("x:",x.size(), "e_idx", edge_index.size())
print("x_shape:", x.shape)
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
def graph_learning(data):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = data #dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(),
lr = lr,
weight_decay = weight_decay)
model.train()
for epoch in range(epochs):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask],
data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval()
_, pred = model(data).max(dim = 1)
correct = float (
pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))
#########################################################
### Main function.
#########################################################
if __name__ == '__main__':
(edge_v1,
edge_v2,
node_number) = read_netlist_graph(graphFileIn, mapFileIn)
data = build_netlist_graph(edge_v1, edge_v2, node_number)
graph_learning(data)
Error messages:
v1: 127378 v2: 127378
Node no. 11354
Edge1 len 127378 Edge2_len 127378
edge_idx: tensor([[ 0, 0, 1, ..., 11353, 4724, 2357],
[ 1, 4, 4, ..., 2342, 2342, 512]])
Feature length: 11354
Data keys: ['x', 'edge_index', 'num_classes']
data['x']: tensor([[0.],
[1.],
[1.],
...,
[1.],
[1.],
[0.]])
num_edges: 127378
num_nodes: 11354
directed: True
data.num_node_features: 1
x: torch.Size([11354, 1]) e_idx torch.Size([2, 127378])
x_shape: torch.Size([11354, 1])
Traceback (most recent call last):
File "gcn_pytorch.py", line 164, in <module>
graph_learning(data)
File "gcn_pytorch.py", line 140, in graph_learning
loss = F.nll_loss(out[data.train_mask],
AttributeError: 'Data' object has no attribute 'train_mask'