I am trying to get the gradients at the first layer of my network and was hoping to get some help on it. I am aware that I am able to use hooks
to accomplish this https://github.com/utkuozbulak/pytorch-cnn-visualizations/blob/master/src/guided_backprop.py. What if my model consists of an LSTM ? How can I get the gradients for each input provided to the LSTM like the architecture below for example. Are there any examples online ? Been looking but have yet to find any.
# ======= CNN LSTM MODEL =========== #
class CNNLSTM(nn.Module):
def __init__(
self, embedding_dim=64, h_dim=32
):
super(CNNLSTM, self).__init__()
# parameters
self.h_dim = h_dim
self.embedding_dim = embedding_dim
## CNN Feature Extractor
self.model = models.vgg16(pretrained=True)
self.model = nn.Sequential(*list(self.model.children())[0]) #5, 10, 18, 25, 31
for name, param in self.model.named_parameters():
if param.requires_grad:
param.requires_grad = False
# CNN feature embedder
self.feature_embedder = nn.Linear(1536, embedding_dim)
# LSTM
self.lstm = nn.LSTM(
embedding_dim, h_dim, 1, dropout=0.0, batch_first=False
)
# MLP classifier
# using cross entropy loss so the output for each sample must have the same size as the total
# number of classes
self.classifier = make_mlp(
[h_dim, 128, 2],
['relu', 'sigmoid'],
batch_norm=False,
dropout=0.0
)
def init_hidden(self, batch):
return (
torch.zeros(1, batch, self.h_dim),
torch.zeros(1, batch, self.h_dim)
)
def forward(self, images_pedestrian_all):