Hi guys,

I want to plot a histogram of the gradient at each layer in order to study the vanishing/exploding aspect. However when I try to seek for the walues of the gradients at each layer with the command model.fc[i].weight.grad, it returns None.

Here is my architecture :

```
class MLP(nn.Module):
def __init__(self, nb_layers, nb_units, input_dim):
"""
Parameters
---------
nb_layers : number of layers
nb_units : number of hidden unit per layer (we assume that it does not change from one layer to another)
"""
super(MLP, self).__init__()
fc = nn.ModuleList()
self.nb_layers = nb_layers
for i in range(nb_layers):
if i == 0:
fc.append(nn.Linear(input_dim, nb_units))
elif i == nb_layers-1:
fc.append(nn.Linear(nb_units, 1))
else:
fc.append(nn.Linear(nb_units, nb_units))
self.fc = fc
self.relu = nn.ReLU()
def forward(self, x):
for i in range(self.nb_layers):
if i == self.nb_layers-1:
x = self.fc[i](x)
else:
x = self.fc[i](x)
x = self.relu(x)
return x
nb_layers = 2
nb_units = 20
input_dim = 2 #representing the number of parameters, here only 2
model = MLP(nb_layers=nb_layers,nb_units=nb_units,input_dim=input_dim)
model.to(device)
```

Here is my training function :

```
def nn_training(model,epochs,learning_rate=0.001):
"""function that trains the Neural Network and returns the training and validation error
Parameters
---------
model : nn.model to be trained
epochs : number of epochs through the training
Return
------
train_losses, valid_losses : 2 lists containing the corresponding losses
"""
error = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
train_losses, valid_losses = [], []
for e in range(epochs):
running_loss = 0
for i, (variable, target) in enumerate(train_loader):
variable, target = variable.to(device), target.to(device)
target = target.unsqueeze(1)
optimizer.zero_grad()
output = model(variable)
loss = error(output,target)
loss.backward()
optimizer.step()
running_loss += loss.item()
else:
valid_loss = 0
# Turn off gradients for validation, saves memory and computations
with torch.no_grad():
# set model to evaluation mode
model.eval()
for i, (variable,target) in enumerate(valid_loader):
variable, target = variable.to(device), target.to(device)
target = target.unsqueeze(1)
output = model(variable)
valid_loss += error(output,target).item()
#set model back to train mode
model.train()
train_losses.append(running_loss/len(train_loader))
valid_losses.append(valid_loss/len(valid_loader))
if (e+1)%5 == 0:
print("Epoch: {}/{}.. ".format(e+1, epochs),
"Training Loss: {:.10f}.. ".format(running_loss/len(train_loader)),
"Validation Loss: {:.10f}.. ".format(valid_loss/len(valid_loader)))
scheduler.step()
return train_losses, valid_losses
```

My inputs are taken from a DataFrame and loaded into a DataLoader in this way :

```
batch_size = 256
train = data_utils.TensorDataset(torch.tensor(train_df_features.values), torch.tensor(train_df[_target.values))
train_loader = data_utils.DataLoader(train, batch_size=batch_size, shuffle=True)
valid = data_utils.TensorDataset(torch.tensor(valid_df_features.values), torch.tensor(valid_df_target.values))
valid_loader = data_utils.DataLoader(valid, batch_size=64)
```

I really hope that you could help me, thanks in advance !