Hi,
Suppose I have a network with say 4 layers. How do i get the passing gradient back?
DL/Dx for layer 3, layer 2?
Currently, I can grad with respect to weights and biases only, but not the intermediate x.
You need to use hook.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = nn.Linear(128, 64)
self.relu1 = nn.Relu()
self.layer2 = nn.Linear(64, 64)
self.relu2 = nn.Relu()
self.register_hook = False
self.hook = {'layer1':[], 'relu1':[], 'layer2':[], 'relu2':[]}
def forward(self, x):
out_layer1 = self.layer1(x)
out_relu1 = self.relu1(out_layer1)
out_layer2 = self.layer2(out_relu1)
out_relu2 = self.relu2(out_layer2)
if self.register_hook:
out_layer1.register_hook(lambda grad: self.hook_fn(grad=grad, name='layer1'))
out_layer1.retain_grad()
out_relu1.register_hook(lambda grad: self.hook_fn(grad=grad, name='relu1'))
out_relu1.retain_grad()
out_layer2.register_hook(lambda grad: self.hook_fn(grad=grad, name='layer2'))
out_layer2.retain_grad()
out_relu2.register_hook(lambda grad: self.hook_fn(grad=grad, name='relu2'))
out_relu2.retain_grad()
return out_relu2
def hook_fn(self, grad, name):
self.hook[name].append(grad)
def reset_hook(self):
self.hook = {'layer1':[], 'relu1':[], 'layer2':[], 'relu2':[]}
Do I need make self.hook=True somewhere?
This is my current network
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2)
self.relu=nn.ReLU()
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(8192, 10)
np.random.seed(1)
self.fc1.weight.data = torch.FloatTensor(1e-3 * np.random.randn(8192, 10)).T
np.random.seed(2)
self.fc1.bias.data = torch.FloatTensor(np.zeros(10))
np.random.seed(3)
self.conv1.weight.data = torch.FloatTensor(1e-3 * np.random.randn(32, 3, 5, 5))
self.conv1.bias.data = torch.FloatTensor(np.zeros(32))
self.register_hook = False
self.hook = {'conv1':[], 'relu':[], 'pool':[],'fc1':[]}
def hook_fn(self, grad, name):
self.hook[name].append(grad)
def reset_hook(self):
self.hook = {'conv1':[], 'relu':[],'pool':[], 'fc1':[]}
def forward(self, x):
step1 = self.conv1(x)
step2 = self.relu(step1)
step3 = self.pool(step2)
step3_flatten=step3.reshape(len(x),-1)
step4 = self.fc1(step3_flatten)
if self.register_hook:
step1.register_hook(lambda grad: self.hook_fn(grad=grad, name='step1'))
step1.retain_grad()
step2.register_hook(lambda grad: self.hook_fn(grad=grad, name='step2'))
step2.retain_grad()
step3.register_hook(lambda grad: self.hook_fn(grad=grad, name='step3'))
step3.retain_grad()
step4.register_hook(lambda grad: self.hook_fn(grad=grad, name='step4'))
step4.retain_grad()
return step4
the reason to have the register_hook is to stop hook when needed. but you may set it to be True as default, and access the gradients by net.hook
Thanks. I will set self.register_hook = True.
Can I access the gradients with net.hook or it would require other syntax?
net.hook is a dictionary, so just access the gradients of the layers by the keys. e.g. net.hook['conv1']
which is a list of gradients after each forward/backward pass
Hi Kevy,
Is the dictionary storing the gradient values one step ahead? It looks a little out of sync when I compare against my numpy implementation.
it stores the gradient after each backward pass. so shouldn’t be out of sync. it hooks after each forward pass. You can double check by setting the parameters to be zero, you should get zero grads for that iteration.