I am trying to implement the Relation Proposal Network in here. In a nutshell, given input P whose dimension is n x 151, where n is the number of nodes and 151 is features for a node, we want the network to predict adjacency matrix A in n x n dimensions.
My current approach is to predict E storing edges with the node indices and then convert the edges into adjacency matrix A_hat and compare against the ground truth A. This method is a bit odd to me. Can the gradient actually pass through (see edge2d_to_adj_matrix
)? I checked the weights of the network; they do not change even after I step on the optimizer. What’s the proper way to implement this?
Snippets of my code are attached below:
def edge2d_to_adj_matrix(E, n):
assert E.shape[1] == 2
A = torch.zeros((n,n))
A[E[:, 0], E[:, 1]] = 1.0
return A
class Projection(nn.Module):
def __init__(self):
super(Projection, self).__init__()
dim_mid = 64
# Class dimension: 151
self.linear1 = nn.Linear(151, dim_mid)
self.act1 = torch.nn.Sigmoid()
self.linear2 = nn.Linear(dim_mid, 16)
self.act2 = torch.nn.Sigmoid()
def forward(self, x):
# x: n x 151
x = self.linear1(x)
x = self.act1(x)
x = self.linear2(x)
x = self.act2(x)
return x
class RePN(nn.Module):
def __init__(self, k, m):
super(RePN, self).__init__()
self.proj1 = Projection()
self.proj2 = Projection()
self.k = k # num of edges to compute S
self.m = m # num of edges to remain
assert m <= k
def forward(self, P):
# P: n x 151, n is num of objects
S = self._score(P, self.proj1, self.proj2) # n x n scoring matrix
n = S.size(0)
_, top_k_index = S.view(-1).topk(self.k+n) # pick the most related k pairs among the n^2 pairs
top_k_index = top_k_index[n:] # do not pair with itself
index_i, index_j = self._get_2d_index(top_k_index, n) # index_i[k] is pair with index_j[k]
assert type(index_i) == type(torch.LongTensor([]))
E = torch.t(torch.stack((index_i, index_j))) # size: k x 2
assert E.size(0) == self.k
assert E.size(1) == 2
return E
def _get_2d_index(self, index1d, n):
index_i = index1d/n # row id
index_j = index1d%n # col id
return (index_i, index_j) # each pair is an edge from i to j
for i, data in enumerate(dataloader, 1):
P_input, E_target = data
P_input = Variable(P_input.squeeze(), requires_grad=True).to(device)
E_target = Variable(E_target.squeeze()).to(device)
optimizer.zero_grad()
E_hat = repn(P_input)
n = P_input.size(0)
E_hat = edge2d_to_adj_matrix(E_hat, n) ####??? can gradient go through?
loss = criterion(E_hat, E_target)
loss.backward()
running_loss += loss.item()
if i % print_every == 0:
print("Loss = %.6f" %(running_loss/print_every))
running_loss = 0.0
optimizer.step()