I am having a hard time debugging this issue. Roughly 100-150 iters into training, I am consistently getting a NaN, which anomaly_detect reports to be associated with an rsample from Normal seen below. I don’t understand how this could be where the NaN is and not the mu, std computation itself. Why does it not pop up during selection_net
's forward pass?
A really aggravating issue is that this happens only 100-150 iterations in. This means that it’s fine for that whole time otherwise.
Synopsis of failing code:
selection_net = PolicySelectionNormal(...)
mu, std = selection_net(batch_data, node_sample, category_sample)
pi_distribution = Normal(mu, std)
action = pi_distribution.rsample()
Error I get:
/home/me/miniconda3/envs/myenv/lib/python3.7/site-packages/torch/autograd/__init__.py:132: UserWarning: Error detected in AddBackward0. Traceback of forward call that caused the error:
File "main.py", line 459, in <module>
run(opts)
File "main.py", line 138, in run
return train(opts, miner)
File "main.py", line 223, in train
log_ = miner.train()
File "/home/me/Code/mydir/rl/pg.py", line 34, in train
loss, log = self.compute_loss()
File "/home/me/Code/mydir/rl/pg.py", line 64, in compute_loss
obs, masks, acts)
File "/home/me/Code/mydir/rl/policies/base.py", line 793, in get_probs_values_entropies
pi_distribution)
File "/home/me/Code/mydir/rl/policies/base.py", line 702, in get_squashed_action_prob_entropy
action = distribution.rsample()
File "/home/me/miniconda3/envs/myenv/lib/python3.7/site-packages/torch/distributions/normal.py", line 68, in rsample
return self.loc + eps * self.scale
(Triggered internally at /opt/conda/conda-bld/pytorch_1607370156314/work/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
allow_unreachable=True) # allow_unreachable flag
Traceback (most recent call last):
File "main.py", line 459, in <module>
run(opts)
File "main.py", line 138, in run
return train(opts, miner)
File "main.py", line 223, in train
log_ = miner.train()
File "/home/me/Code/mydir/rl/pg.py", line 35, in train
loss.backward()
File "/home/me/miniconda3/envs/myenv/lib/python3.7/site-packages/comet_ml/monkey_patching.py", line 293, in wrapper
return_value = original(*args, **kwargs)
File "/home/me/miniconda3/envs/myenv/lib/python3.7/site-packages/torch/tensor.py", line 221, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/me/miniconda3/envs/myenv/lib/python3.7/site-packages/torch/autograd/__init__.py", line 132, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: Function 'AddBackward0' returned nan values in its 0th output.
Network that has the issue:
class PolicySelectionNetwork(nn.Module):
"""Continuous policy for selection."""
def __init__(self, num_node_features, hidden_size, embedding_dim,
num_node_out, num_choice_out, num_layers):
super().__init__()
self.node_embeddings = nn.Embedding(num_node_out, embedding_dim)
self.choice_embeddings = nn.Embedding(num_choice_out, embedding_dim)
layers = [GCNConv(num_node_features, hidden_size)]
for i in range(num_layers):
layers.append(GCNConv(hidden_size, hidden_size))
self.layers = nn.ModuleList(layers)
self.conversion_layer = nn.Linear(hidden_size, embedding_dim)
layers = [nn.Linear(embedding_dim, hidden_size)]
self.combined_layers = nn.ModuleList(layers)
self.mu_layer = nn.Linear(hidden_size, 1)
self.log_std_layer = nn.Linear(hidden_size, 1)
def forward(self, observations, nodes, choices):
"""Compute the policy and the logprobs.
Args:
- observations: torch.geometric Batch.
- nodes: A batch of one-hot node actions.
- choices: A batch of one-hot choice actions.
"""
px, edge_index = observations.x, observations.edge_index
for layer in self.layers[:-1]:
px = layer(px, edge_index)
px = F.relu(px)
px = self.layers[-1](px, edge_index)
global_px = global_mean_pool(px, observations.batch)
global_px = self.conversion_layer(global_px)
nodes = self.node_embeddings(nodes)
choices = self.choice_embeddings(choices)
policy = global_px + nodes + choices
for layer in self.combined_layers:
policy = layer(policy)
policy = F.relu(policy)
mu = self.mu_layer(policy)
log_std = self.log_std_layer(policy)
log_std = torch.clamp(log_std, LOG_STD_MIN, LOG_STD_MAX)
std = torch.exp(log_std)
return mu, std