What is the difference between `DGL` and `PyG` implemetation of Graph Convolution Neural Net?

I’m new to PyTorch-geometric and geometric deep learning. I am going through the implementation of the graph convolution network implemented in both Pytorch geometric and Deep-Graph-Libray. But it seems to me both the implementations are pretty different.

DGL implementation (OR simple PyTorch based) : (GitHub - tkipf/pygcn at 1600b5b748b3976413d1e307540ccc62605b4d6d)

class GraphConvLayer(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvLayer, self).__init__()

        self.in_features = in_features
        self.out_features = out_features
        self.weights = nn.Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = nn.Parameters(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdev = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdev, stdev)
        if self.bias is not None:
            self.bias.data.uniform_(-stdev, stdev)
        
    def forward(self, x, adj):
        support = torch.mm(x, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
                + str(self.in_features) + ' -> ' \
                + str(self.out_features) + ') '
 
                           
class GCN(nn.Module):
    def __init__(self, n_features, n_hidden, n_classes, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvLayer(n_features, n_hidden)
        self.gc2 = GraphConvLayer(n_hidden, n_classes)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

PyTorch-Geometric implementation (Creating Message Passing Networks — pytorch_geometric 1.7.2 documentation):


class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # Step 1: Add self-loops
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Multiply with weights
        x = self.lin(x)

        # Step 3: Calculate the normalization
        row, col = edge_index
        deg = degree(row, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4: Propagate the embeddings to the next layer
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x,
                              norm=norm)

    def message(self, x_j, norm):
        # Normalize node features.
        return norm.view(-1, 1) * x_j


class Net(torch.nn.Module):
    def __init__(self, dataset):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In PyG implementation the terms like degree, power, etc are coming which I can see are from the GCN equation. But here I did not understand the self.propagate. and why such terms are not in DGL implementation?

What I am missing? What is the difference between both the implementation?