Gradient Flow on Graph Networks

I am trying to implement the Relation Proposal Network in here. In a nutshell, given input P whose dimension is n x 151, where n is the number of nodes and 151 is features for a node, we want the network to predict adjacency matrix A in n x n dimensions.

My current approach is to predict E storing edges with the node indices and then convert the edges into adjacency matrix A_hat and compare against the ground truth A. This method is a bit odd to me. Can the gradient actually pass through (see edge2d_to_adj_matrix)? I checked the weights of the network; they do not change even after I step on the optimizer. What’s the proper way to implement this?

Snippets of my code are attached below:

def edge2d_to_adj_matrix(E, n):
    assert E.shape[1] == 2
    A = torch.zeros((n,n))
    A[E[:, 0], E[:, 1]] = 1.0
    return A

class Projection(nn.Module):
    def __init__(self):
        super(Projection, self).__init__()
        dim_mid = 64
        # Class dimension: 151
        self.linear1 = nn.Linear(151, dim_mid)
        self.act1 = torch.nn.Sigmoid()
        self.linear2 = nn.Linear(dim_mid, 16)
        self.act2 = torch.nn.Sigmoid()

    def forward(self, x):
        # x: n x 151 
        x = self.linear1(x)
        x = self.act1(x)
        x = self.linear2(x)
        x = self.act2(x)
        return x
class RePN(nn.Module):
    def __init__(self, k, m):
        super(RePN, self).__init__()
        self.proj1 = Projection()
        self.proj2 = Projection()
        self.k = k # num of edges to compute S
        self.m = m # num of edges to remain
        assert m <= k

    def forward(self, P):

        # P: n x 151, n is num of objects
        S = self._score(P, self.proj1, self.proj2) # n x n scoring matrix
        n = S.size(0)
        _, top_k_index = S.view(-1).topk(self.k+n) # pick the most related k pairs among the n^2 pairs
        top_k_index = top_k_index[n:] # do not pair with itself
        index_i, index_j = self._get_2d_index(top_k_index, n) # index_i[k] is pair with index_j[k]
        assert type(index_i) == type(torch.LongTensor([]))
        E = torch.t(torch.stack((index_i, index_j))) # size: k x 2
        assert E.size(0) == self.k
        assert E.size(1) == 2
        return E
    def _get_2d_index(self, index1d, n): 
        index_i = index1d/n # row id
        index_j = index1d%n # col id
        return (index_i, index_j) # each pair is an edge from i to j

for i, data in enumerate(dataloader, 1): 
    P_input, E_target = data

    P_input = Variable(P_input.squeeze(), requires_grad=True).to(device)
    E_target = Variable(E_target.squeeze()).to(device)

    E_hat = repn(P_input)
    n = P_input.size(0)
    E_hat = edge2d_to_adj_matrix(E_hat, n) ####??? can gradient go through?
    loss = criterion(E_hat, E_target)
    running_loss += loss.item()
    if i % print_every == 0: 
        print("Loss = %.6f" %(running_loss/print_every))
        running_loss = 0.0