I am trying to calculate the mutual information between the hidden layers’ output and input and output using the following code:
def InfoNCE(X, Y, batch_size=256, num_epochs=200, dev=torch.device(“cpu”), model=None, rg=True):
A = torch.tensor([float(batch_size)] * batch_size).reshape(batch_size, 1)#.cuda()
if not model:
model = nn.Sequential(
nn.Linear(X.shape[1]+Y.shape[1], 16),
nn.ReLU(),
nn.Linear(16, 8),
nn.ReLU(),
nn.Linear(8, 1),
)
# Move data to device
X = X.to(dev)
Y = Y.to(dev) + torch.randn_like(Y) * 1e-4
model = model.to(dev)
opt = optim.SGD(model.parameters(), lr=0.03, momentum=0.9)
td = TensorDataset(X, Y)
result = []
for epoch in range(num_epochs):
for x, y in DataLoader(td, batch_size, shuffle=True, drop_last=True):
opt.zero_grad()
top = model(torch.cat([x, y], 1)).flatten()
xiyj = torch.cat([x.repeat_interleave(batch_size,dim=0),y.repeat(batch_size,1)], 1)
bottom = torch.logsumexp(model(xiyj).reshape(batch_size,batch_size), 1) - A.log()
loss = -(top - bottom).mean()
result.append(-loss.item())
loss.backward(retain_graph=rg)
opt.step()
r = torch.tensor(result[-20:]).mean()
#plt.plot(result)
print(r)
return r
InfoNCE(dataset.x, layer_2_log[1])
I tried setting retain_graph both true and false after reading some posts, it still gives the runtime error no matter what.
Any help would be appreciated!