Save and Load Model NOT working

Nasr · September 26, 2022, 11:38am

hey guys i wrote model to classification audio and use MFCC transform
but after model training i cant use model to predict class
i got this error:
x = F.relu(self.fc1(x.reshape(-1,x.shape[1] * x.shape[2]*x.shape[3])))
x = self.dropout5(x)
IndexError: tuple index out of range

ptrblck · September 26, 2022, 3:57pm

I guess your x input tensor doesn’t have 3 dimensions:

x = torch.randn(2, 3)
x.shape[0]
x.shape[1]
x.shape[2]
# IndexError: tuple index out of range

so check its shape and make sure you can index it.

Nasr · September 29, 2022, 2:29pm

i check the size
the size is : torch.size([1,16,3,8])
also i don’t have problem with model because i can train by data and can’t test the model by recording my own voice and take it to the model

ptrblck · September 29, 2022, 4:02pm

This description sounds as if the error is in the test case which you might not have checked yet.

Nasr · October 2, 2022, 5:38am

i use these code for the model

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN2D(nn.Module):
    def __init__(self, num_class):
        super(NN2D,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1)
        self.dropout1 = nn.Dropout(0.3) 
    
        self.conv2 = nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3,stride=1)
        self.dropout2 = nn.Dropout(0.3)
                
        self.fc1 = nn.Linear(384, 256)
        self.dropout5 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256,128)
        self.dropout6 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(128, num_class)
        
    def forward(self, x):
        
        x = F.max_pool2d(F.relu(self.conv1(x)),kernel_size=3)
        x = self.dropout1(x)
        
        x = F.max_pool2d(F.relu(self.conv2(x)),kernel_size=3)
        x = self.dropout2(x)
   
        #print(x.shape)
        x = F.relu(self.fc1(x.reshape(-1,x.shape[1] * x.shape[2]*x.shape[3])))
        x = self.dropout5(x)
        
        x = F.relu(self.fc2(x))
        x = self.dropout6(x)
        
        x = self.fc3(x)
        
        #print(x.shape)
        return x

and also i use this for the data loader:

from torch.utils.data import DataLoader,random_split,Dataset

class SpeechDataLoader(Dataset):
    
    def __init__(self,data,labels,list_dir,transform):
        self.data = data
        self.labels = labels
        self.label_dict = list_dir
        self.transform = transform
            
    def __len__(self):
        return len(self.data)    
    
    def __getitem__(self,idx):
        waveform = self.data[idx]
        
        waveform = self.transform(waveform)

        if self.labels[idx] in self.label_dict:
            out_labels = self.label_dict.index(self.labels[idx])
            
        return waveform, out_labels

and its my train and test function:

import torch,os
from tqdm import tqdm
import torch.optim as optim


best_acc=0

def train(net,trainloader,optim,scheduler,criterion,epoch,device):
    print("Training")
    net.train()
    train_loss = 0
    total = 0
    total_correct = 0
    
    iterator = tqdm(trainloader)
    
    for inputs,targets in iterator:
        
        inputs,targets = inputs.to(device), targets.to(device)
        
        optim.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs,targets)
        loss.backward()
        optim.step()
        scheduler.step()
        
        train_loss += loss.item()
        _,predicted = torch.max(outputs.data,1)
        total_correct += (predicted == targets).sum().item()
        total += targets.size(0)
    
    print("Epoch: [{}]  loss: [{:.2f}] Accuracy [{:.2f}] ".format(epoch+1,train_loss/len(trainloader),
                                                                           total_correct*100/total))

def test(net,testloader,optim,criterion,epoch,device,results_txt,model_name):
    global best_acc
    print("validation")
    net.eval()
    test_loss,total,total_correct = 0,0,0
    
    iterator = tqdm(testloader)
    
    for inputs, targets in iterator:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        total_correct += (predicted == targets).sum().item()

    # Save checkpoint when best model
    acc = 100. * total_correct / total
    print("\nValidation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" %(epoch+1, test_loss/len(testloader), acc))

    f = open(results_txt+".txt","a+")
    f.write("Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%% \n" %(epoch+1, test_loss/len(testloader), acc))
    f.close() 
        
    
    if acc > best_acc:
        
        
        if isinstance(net, torch.nn.DataParallel):
            print("multiple GPU")
            print('Saving Best model...\t\t\tTop1 = %.2f%%' %(acc))
            state = {
                'model':net.module.state_dict(),
                'model1': net.state_dict(),
                'model2': net,
                'acc':acc,
                'epoch':epoch,
            }
        
        else:
            print("not multiple GPU")
            state = {
                    'model':net,
                    'acc':acc,
                    'epoch':epoch,
                    }      
            
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        save_point = './checkpoint/'
        if not os.path.isdir(save_point):
            os.mkdir(save_point)
        torch.save(state, save_point+model_name+'.t7')
        best_acc = acc
        
    return best_acc

Nasr · October 2, 2022, 5:46am

then i use this code to load wave and label

path_to_text = '/content/dataset_folder/'
dataset_generation_array = []
daataset = []

for n in os.listdir(path_to_text):
  path1 = path_to_text+n+'/'
  path_generate =glob.glob(path1+'*.WAV')+glob.glob(path1+'*.wav')
  for b in path_generate:
    dataset_generation_array.append(b)
random.shuffle(dataset_generation_array)
for i in dataset_generation_array:
  make_label_with_wave = []
  signalss, sample_raterr = torchaudio.load(i)
  make_label_with_wave.append(signalss)
  make_label_with_wave.append(sample_raterr)
  make_label_with_wave.append(i[len(path_to_text):len(path_to_text)+1])
  daataset.append(make_label_with_wave)
train_audio_path = '/content/dataset_folder/'
labels_dict=os.listdir(train_audio_path)
plt.plot(daataset[0][0].t())
plt.show()
print (labels_dict)
print (daataset[0][2])
Audio(daataset[0][0], rate = sample_raterr)

then use this to do zero padding to make all data same size

wave1 = []
labels = []
wave = []
max = 0
for i in daataset:
  wave1.append(i[0])
  labels.append(i[2])

for i in wave1:
  if max < i.shape[1]:
    max = i.shape[1]
#print (max)
for j in wave1:
  wave.append(F.pad(input=j, pad=(max - j.shape[1], 0), mode='constant', value=0))

now select the model and transform and also data loader

train_audio_transforms = nn.Sequential(torchaudio.transforms.MFCC(log_mels=False))
net = NN2D(num_class=35)

dataset= SpeechDataLoader(wave,labels,labels_dict, train_audio_transforms)
traindata, testdata = random_split(dataset, [round(len(dataset)*.8), round(len(dataset)*.2)])
trainloader = torch.utils.data.DataLoader(traindata, batch_size=1, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=1, shuffle=True)

at the end use it to do train

num_epochs=15
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
result_file = 'results_txt'
net = net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.001)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                              steps_per_epoch=int(len(trainloader)),
                                              epochs=num_epochs,
                                              anneal_strategy='linear') 

for epoch in range(0, num_epochs):
    train(net,trainloader,optimizer,scheduler,criterion,epoch,device)
    best_acc = test(net,testloader,optimizer,criterion,epoch,device,result_file,'0')

Nasr · October 2, 2022, 6:02am

until this step all things works good
but when i want to load the model and predict new data i get the error that i said.

Nasr · October 4, 2022, 5:27am

i found the problem:
the normal shape of data is [16,3,8]
but batch make shape: [1,16,3,8] because batch size is: 1
so in test data need to use
inputs = inputs.unsqueeze(0)
to make shape [1,16,3,8] and solve the problem
thanks guys