Hi Community,
My model (all linear layers with RELU in between–no redundant softmax, initialized with xavier_uniform_) has two problems:
1 the loss is sometimes nan (all the way) because the predictions have ‘inf’. I’m not sure how this could be
2 when the loss is not nan (I don’t have a fixed randomization seed which fortunately exposed these problems), its value is completely fixed (down to many decimal places). This seems to be because gradients from autograd are 0
The input is a 1D array of a few hundred features. The output is a yes-no (binary classification)
Could you please give me some advice?
Many Thanks!
# model definition:
"""
Inputs:
1 ```dim_input``` (int) input dimension\n
2 ```dim_output``` (int) output dimension\n"""
def __init__(self, dim_input : int, dim_output : int, dim_hidden : list[int, ...]):
super().__init__()
self._dims : list[int] = [dim_input] + dim_hidden + [dim_output]
self._ct_layers = len(self._dims)
def build_network(self,) -> dict[str, torch.Tensor]:
parameter : dict[str, torch.Tensor] = {}
# build layers (feature extractor)
for (i, dim) in enumerate(self._dims[0: -1]):
parameter[f'linear_weight_{i}'] = nn.init.xavier_uniform_(
torch.empty(
self._dims[i + 1],
dim,
requires_grad=True,
device=self.device))
parameter[f'linear_bias_{i}'] = nn.init.zeros_(
torch.empty(
self._dims[i + 1],
requires_grad=True,
device=self.device))
return parameter
def forward_propagation(self, incoming : torch.Tensor, parameter : dict[str, torch.Tensor]):
x = incoming
for i in range(self._ct_layers - 1):
x = F.linear(
input=x,
weight=parameter[f'linear_weight_{i}'],
bias=parameter[f'linear_bias_{i}'])
if i != self._ct_layers - 1:
x = F.relu(x)
return x
Where the error happens (this is model-agnostic meta-learning algorithm, hence the inner loop)
def _inner_loop(self, example : torch.Tensor, label : torch.Tensor, training : bool
) -> tuple[dict[str, torch.Tensor], list[float]]:
"""the 'Inner Loop' adapts network parameters to ONE task
-
Inputs:
1 ```support_input``` (Tensor): task support set inputs
shape (number of examples, channels, height, width)
2 ```support_label``` (Tensor): task support set labels
shape (number of examples,)
3 ```we_are_training``` (bool): whether we are training or evaluating
received from ```_outer_step()```
Returns:
1 ```adapted_parameter``` (dict[str, Tensor]): adapted network parameters.\n
2 ```accuracy_changes``` (list[float]): support set accuracy over the course of
the inner loop, length num_inner_steps + 1––the last entry tracks post-adaptation accuracy
"""
# print(f"example shape: {example.shape}")
# print(f"label shape: {label.shape}")
accuracy_changes : list[float] = []
adapted_parameter = {
key: torch.clone(value)
for key, value in self._meta_parameters.items()
}
for _ in range(self._ct_inner_steps):
predicted = self._network.forward_propagation(incoming=example, parameter=adapted_parameter)
assert torch.isnan(predicted).any() == False, "Inner, Nan values in prediction"
assert torch.isnan(label).any() == False, "Inner, Nan values in label"
# print(f"predicted shape: {predicted.shape}")
# print(f"label shape: {label.shape}")
loss = F.cross_entropy(input=predicted, target=label)
assert torch.isnan(loss).any() == False, f"Inner, Nan values in loss, \n predicted {predicted}, \n labels {label}"
# dictionary.values() returns a VIEW of the list, NOT the actual list!
accuracy_changes.append(util.score(logits=predicted, labels=label))
gradients = autograd.grad(outputs=loss, inputs=adapted_parameter.values(), create_graph=training)
for (layer, key) in enumerate(adapted_parameter.keys()):
adapted_parameter[key] = adapted_parameter[key] - self._learning_rate_inner[key] * gradients[layer]
# print(f"Inner loop gradients: {gradients[0:10]}")
# to get a read on the effect of adapatation
predicted = self._network.forward_propagation(incoming=example, parameter=adapted_parameter)
accuracy_changes.append(util.score(logits=predicted, labels=label))
return adapted_parameter, accuracy_changes
I checked the forward propagation with ‘asserts’ and none ever fire up