Hi everyone. I am trying to train an LSTM network on the TIMIT dataset for speech recognition.
I am having issues though with the criterion and comparing the output of the model against the labels.
This is how I am preprocessing the data:
1 I get the mfcc values and store them and do the same with the file name which corresponds to the sound label.
mfccs = []
labels = []
for x in os.listdir("phones"):
try:
y, sr = librosa.load("phones/"+x, sr=16000)
test = librosa.feature.mfcc(y, sr, n_mfcc=20, hop_length=50)
#GET THE MFCC VALUES AND ADD LABEL
mfccs.append(test)
l = re.sub("\d", "", x)
labels.append(l)
except:
continue
- I pad the arrays since they have variable size and try to make a onehot encoding of the labels and create training-test sets. Which seems to work.
maxshape = max(len(x[1]) for x in mfccs)
for x in range(len(mfccs)):
mfccs[x] = np.resize(mfccs[x],(20,maxshape))
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
labeler = LabelEncoder()
labeler.fit(np.array(labels))
lbls = labeler.transform(np.array(labels))
onehot = OneHotEncoder(sparse =False)
lbls = lbls.reshape(len(lbls), 1)
onehot = onehot.fit_transform(lbls)
X_train, X_test, y_train, y_test = train_test_split(
mfccs, onehot, test_size=0.20, random_state=42)
print(X_train[0].shape, y_train[0])
import torch.utils.data as utils
my_dataset = utils.TensorDataset(torch.tensor(X_train),torch.tensor(y_train))
my_dataloader = utils.DataLoader(my_dataset)
my_dataset = utils.TensorDataset(torch.tensor(X_test),torch.tensor(y_test))
testloader = utils.DataLoader(my_dataset)
- I try to train the model but get this error:
import torch.optim as optim
lstm1 = Model()
lstm1.train()
criterion = nn.NLLLoss()
optimizer = optim.SGD(lstm1.parameters(), lr=0.001, momentum=.09)
print(lbls[1])
print("OK")
for epoch in range(50): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(my_dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, lab = data
#print(inputs, inputs.shape)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = lstm1(inputs.view(1,1,-1))
print(outputs[0].shape, lab.float().shape)
loss = criterion(outputs, lab.long())
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-117-e746edaa3d7a> in <module>
20 outputs = lstm1(inputs.view(1,1,-1))
21 print(outputs[0].shape, lab.float().shape)
---> 22 loss = criterion(outputs, lab.long())
23 loss.backward()
24 optimizer.step()
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~/.local/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
202
203 def forward(self, input, target):
--> 204 return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
205
206
~/.local/lib/python3.6/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1852 if reduction != 'none':
1853 ret = torch._C._nn.nll_loss2d(
-> 1854 input, target, weight, reduction_enum, ignore_index)
1855 else:
1856 out = torch._C._nn.nll_loss2d(
RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at /pytorch/aten/src/THNN/generic/SpatialClassNLLCriterion.c:111
The output layer has the same shape as the onehot encoded labels (26).