Warning: NaN or Inf found in input tensor. why? I am very confused
How do you create the dataset and do you apply any preprocessing on it?
Also, which line of code throws this warning?
This is the code of creating the dataset:
class MyDataset(Dataset):
def __init__(self, data_path, max_length=1024):
self.data_path = data_path
self.vocabulary = list("""abcdefghijklmnopqrstuvwxyz0123456789,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}""")
texts, labels = [], []
with open(data_path) as csv_file:
reader = csv.reader(csv_file, quotechar='"')
for idx, line in enumerate(reader):
text = ""
for tx in line[1:]:
text += tx
text += " "
label = int(line[0]) - 1
texts.append(text)
labels.append(label)
self.texts = texts
self.labels = labels
self.max_length = max_length
self.length = len(self.labels)
self.num_classes = len(set(self.labels))
def __len__(self):
return self.length
def __getitem__(self, index):
raw_text = self.texts[index]
data = [self.vocabulary.index(i) + 1 for i in list(raw_text) if i in self.vocabulary]
if len(data) > self.max_length:
data = data[:self.max_length]
elif len(data) < self.max_length:
data += [0] * (self.max_length - len(data))
label = self.labels[index]
return np.array(data, dtype=np.int64), label```
And this is the code of model:
```class VDCNN(nn.Module):
def __init__(self, n_classes=14, num_embedding=69, embedding_dim=64, depth=17, n_fc_neurons=1024, shortcut=True):
super(VDCNN, self).__init__()
layers = []
fc_layers = []
base_num_features = 256
self.embed = nn.Embedding(num_embedding, embedding_dim, padding_idx=0, max_norm=None,
norm_type=2, scale_grad_by_freq=False, sparse=False)
layers.append(nn.Conv1d(embedding_dim, base_num_features, kernel_size=3, padding=1))
layers.append(ConvBlock(n_filters=base_num_features, kernel_size=3, padding=1, shortcut=shortcut, pool=False))
num_conv_block = (depth-2) // 2
for _ in range(num_conv_block):
layers.append(ConvBlock(n_filters=base_num_features, kernel_size=3, padding=1,
shortcut=shortcut))
fc_layers.extend([nn.Linear(8 * base_num_features, n_fc_neurons), nn.ReLU()])
fc_layers.extend([nn.Linear(n_fc_neurons, n_fc_neurons), nn.ReLU()])
fc_layers.extend([nn.Linear(n_fc_neurons, n_classes)])
self.layers = nn.Sequential(*layers)
self.fc_layers = nn.Sequential(*fc_layers)
self.__init_weights()
def __init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv1d):
kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
def forward(self, input):
print('input:', input)
output = self.embed(input)
output = output.transpose(1, 2)
print(output)
output = self.layers(output)
output = output.contiguous().view(output.size(0), -1)
output = self.fc_layers(output)
return output
the code output = self.embed(input), print(output)
, ‘output’ has ‘nan’, the output is
tensor([[[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.],
...,
[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.]],
[[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.],
...,
[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.],
[nan, nan, nan, ..., 0., 0., 0.]],
I am very confused
You might be seeing this warning from tensorboardX writer, so might not be an issue with dataset but with metrics
Can you confirm the warning isn’t being generated from the following source https://github.com/lanpa/tensorboardX/blob/master/tensorboardX/x2num.py line 13
I think you should check the return type of the numpy array. This might be happening because of the type conversion between the numpy array and torch tensor.
I would give one suggestion, all your fc
layers weight are not initialized. Since __init_weights
only initialize weights from conv1d
.
I encounterd this problem too. I checked the data and changed learning rate but didn’t work. I think @arvindsg is right, so I reinstalled and upgraded the torchboard version to the latest (2.11.2) and no warnings anymore!