hello,
Im training models to prodict node entities in multi-relational graphs.
Therefore I want to train node embeddings with the RGCNConv layer.
For the node embeddings I use the nn.Embedding layer and these emebddings are randomly initialized. so therefore it would be nice to update those in the backprop.
In the forward I pass nn.Embedding.weight, edge_index, edge_type to the RGCNConv layer.
the nn.Embedding.weight have size(number of graph nodes, embedding dimension).
I specify requires_grad = true when initializing nn.emedding.
When I print the gradients of nn.embedding.weights I get non zero gradients of size(number of graph nodes, embedding dimension) which looks alright. When I look into the documentation of RGCNConv Id think that the embeddings would update automatically. But maybe I do not use the layer correctly.
Maybe using the nn.Embedding is unnecessary and use a matrix of shape(number of nodes, embedding dimension) instead.
any suggestions or improvements on the code are appreciated!
class Emb_Layers(nn.Module):
def __init__(self, num_relations: int, hidden_l: int, num_labels: int, emb_dim: int, _) -> None:
super(Emb_Layers, self).__init__()
self.rgcn1 = RGCNConv(in_channels=emb_dim, out_channels=hidden_l, num_relations=num_relations)
self.rgcn2 = RGCNConv(hidden_l, num_labels, num_relations)
nn.init.kaiming_uniform_(self.rgcn1.weight, mode='fan_in')
nn.init.kaiming_uniform_(self.rgcn2.weight, mode='fan_in')
def forward(self, training_data: Data) -> Tensor:
x = self.rgcn1(training_data.embedding.weight, training_data.edge_index, training_data.edge_type)
x = F.relu(x)
x = self.rgcn2(x, training_data.edge_index, training_data.edge_type)
x = torch.sigmoid(x)
return x
This is the training loop that I use:
def train(self, model: nn.Module, graph: Graph, sum_graph=True) -> Tuple[List, List]:
model = model.to(self.device)
training_data = graph.training_data.to(self.device)
loss_f = torch.nn.BCELoss().to(self.device)
optimizer = torch.optim.Adam(model.parameters(), lr=self.lr, weight_decay=self.weight_d)
accuracies = []
losses = []
for epoch in range(self.epochs):
if not sum_graph:
model.eval()
acc = self.evaluate(model, training_data)
accuracies.append(acc)
model.train()
optimizer.zero_grad()
out = model(training_data)
targets = training_data.y_train.to(torch.float32)
output = loss_f(out[training_data.x_train], targets)
# print(training_data.embedding.weight[0].clone())
output.backward()
optimizer.step()
# print(training_data.embedding.weight)
# print(training_data.embedding.weight.grad[0])
l = output.item()
losses.append(l)
if not sum_graph:
print(f'Accuracy on validation set = {acc}')
if epoch%10==0:
print(f'Epoch: {epoch}, Loss: {l:.4f}')
return accuracies, losses