This is my whole neural network, sample VAE code from pytorch.
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(784, 400)
self.fc21 = nn.Linear(400, 20)
self.fc22 = nn.Linear(400, 20)
self.fc3 = nn.Linear(20, 400)
self.fc4 = nn.Linear(400, 784)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5*logvar)
eps = torch.randn_like(std)
return mu + eps*std
def decode(self, z):
h3 = F.relu(self.fc3(z))
return torch.sigmoid(self.fc4(h3))
def forward(self, x):
mu, logvar = self.encode(x.view(-1, 784))
z = self.reparameterize(mu, logvar)
return self.decode(z), mu, logvar
and what I want to do is checking all the gradient of fc1. Since it is a Linear(784,400) layer, I hoped that this has 128(batch size)-784-400 elements. I used my register_backward_hook(printsize) to check it.
def printsize(self,grad_input,grad_output):
print('Inside '+ self.__class__.__name__+ ' backward')
print('grad_input : ', type(grad_input))
print('grad_input[0] : ', type(grad_input[0]))
print('grad_output : ', type(grad_output))
print('grad_output[0] : ', type(grad_output[0]))
print('grad_input size : ', grad_input[0].size())
print('grad_output size : ', grad_output[0].size())
It turns out that grad_input size is 400, and grad_output size is 128*400.
How can I check the gradient of 784-400 elements?
It seems like my understanding of register_hook is wrong…
By the way, I want to quote my code properly… it is very ugly now… It would be great if anyone teaches me how to fix this topic…