Custom Dataset shape problems

andcarnivorous · October 30, 2019, 3:44pm

Hi everyone. I am trying to train an LSTM network on the TIMIT dataset for speech recognition.

I am having issues though with the criterion and comparing the output of the model against the labels.

This is how I am preprocessing the data:
1 I get the mfcc values and store them and do the same with the file name which corresponds to the sound label.

mfccs = []
labels = []

for x in os.listdir("phones"):
    try:
        y, sr = librosa.load("phones/"+x, sr=16000)
        test = librosa.feature.mfcc(y, sr, n_mfcc=20, hop_length=50)
        #GET THE MFCC VALUES AND ADD LABEL
        mfccs.append(test)
        l = re.sub("\d", "", x)
        labels.append(l)
    except:
        continue

I pad the arrays since they have variable size and try to make a onehot encoding of the labels and create training-test sets. Which seems to work.

maxshape = max(len(x[1]) for x in mfccs)

for x in range(len(mfccs)):
    mfccs[x] = np.resize(mfccs[x],(20,maxshape))

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder,OneHotEncoder

labeler = LabelEncoder()
labeler.fit(np.array(labels))
lbls = labeler.transform(np.array(labels))

onehot = OneHotEncoder(sparse =False)
lbls = lbls.reshape(len(lbls), 1)
onehot = onehot.fit_transform(lbls)


X_train, X_test, y_train, y_test = train_test_split(
    mfccs, onehot, test_size=0.20, random_state=42)

print(X_train[0].shape, y_train[0])

import torch.utils.data as utils

my_dataset = utils.TensorDataset(torch.tensor(X_train),torch.tensor(y_train)) 
my_dataloader = utils.DataLoader(my_dataset) 

my_dataset = utils.TensorDataset(torch.tensor(X_test),torch.tensor(y_test)) 
testloader = utils.DataLoader(my_dataset)

I try to train the model but get this error:

import torch.optim as optim
lstm1 = Model()

lstm1.train()
criterion = nn.NLLLoss()
optimizer = optim.SGD(lstm1.parameters(), lr=0.001, momentum=.09)
print(lbls[1])
print("OK")
for epoch in range(50):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(my_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, lab = data
        #print(inputs, inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = lstm1(inputs.view(1,1,-1))
        print(outputs[0].shape, lab.float().shape)
        loss = criterion(outputs, lab.long())
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-117-e746edaa3d7a> in <module>
     20         outputs = lstm1(inputs.view(1,1,-1))
     21         print(outputs[0].shape, lab.float().shape)
---> 22         loss = criterion(outputs, lab.long())
     23         loss.backward()
     24         optimizer.step()

~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/.local/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    202 
    203     def forward(self, input, target):
--> 204         return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
    205 
    206 

~/.local/lib/python3.6/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1852         if reduction != 'none':
   1853             ret = torch._C._nn.nll_loss2d(
-> 1854                 input, target, weight, reduction_enum, ignore_index)
   1855         else:
   1856             out = torch._C._nn.nll_loss2d(

RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed.  at /pytorch/aten/src/THNN/generic/SpatialClassNLLCriterion.c:111

The output layer has the same shape as the onehot encoded labels (26).