Save and Load Model NOT working

hey guys i wrote model to classification audio and use MFCC transform
but after model training i cant use model to predict class
i got this error:
x = F.relu(self.fc1(x.reshape(-1,x.shape[1] * x.shape[2]*x.shape[3])))
x = self.dropout5(x)
IndexError: tuple index out of range

I guess your x input tensor doesn’t have 3 dimensions:

x = torch.randn(2, 3)
x.shape[0]
x.shape[1]
x.shape[2]
# IndexError: tuple index out of range

so check its shape and make sure you can index it.

i check the size
the size is : torch.size([1,16,3,8])
also i don’t have problem with model because i can train by data and can’t test the model by recording my own voice and take it to the model

This description sounds as if the error is in the test case which you might not have checked yet.

i use these code for the model

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN2D(nn.Module):
    def __init__(self, num_class):
        super(NN2D,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=8,kernel_size=3,stride=1)
        self.dropout1 = nn.Dropout(0.3) 
    
        self.conv2 = nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3,stride=1)
        self.dropout2 = nn.Dropout(0.3)
                
        self.fc1 = nn.Linear(384, 256)
        self.dropout5 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256,128)
        self.dropout6 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(128, num_class)
        
    def forward(self, x):
        
        x = F.max_pool2d(F.relu(self.conv1(x)),kernel_size=3)
        x = self.dropout1(x)
        
        x = F.max_pool2d(F.relu(self.conv2(x)),kernel_size=3)
        x = self.dropout2(x)
   
        #print(x.shape)
        x = F.relu(self.fc1(x.reshape(-1,x.shape[1] * x.shape[2]*x.shape[3])))
        x = self.dropout5(x)
        
        x = F.relu(self.fc2(x))
        x = self.dropout6(x)
        
        x = self.fc3(x)
        
        #print(x.shape)
        return x 

and also i use this for the data loader:

from torch.utils.data import DataLoader,random_split,Dataset

class SpeechDataLoader(Dataset):
    
    def __init__(self,data,labels,list_dir,transform):
        self.data = data
        self.labels = labels
        self.label_dict = list_dir
        self.transform = transform
            
    def __len__(self):
        return len(self.data)    
    
    def __getitem__(self,idx):
        waveform = self.data[idx]
        
        waveform = self.transform(waveform)

        if self.labels[idx] in self.label_dict:
            out_labels = self.label_dict.index(self.labels[idx])
            
        return waveform, out_labels

and its my train and test function:

import torch,os
from tqdm import tqdm
import torch.optim as optim


best_acc=0

def train(net,trainloader,optim,scheduler,criterion,epoch,device):
    print("Training")
    net.train()
    train_loss = 0
    total = 0
    total_correct = 0
    
    iterator = tqdm(trainloader)
    
    for inputs,targets in iterator:
        
        inputs,targets = inputs.to(device), targets.to(device)
        
        optim.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs,targets)
        loss.backward()
        optim.step()
        scheduler.step()
        
        train_loss += loss.item()
        _,predicted = torch.max(outputs.data,1)
        total_correct += (predicted == targets).sum().item()
        total += targets.size(0)
    
    print("Epoch: [{}]  loss: [{:.2f}] Accuracy [{:.2f}] ".format(epoch+1,train_loss/len(trainloader),
                                                                           total_correct*100/total))

def test(net,testloader,optim,criterion,epoch,device,results_txt,model_name):
    global best_acc
    print("validation")
    net.eval()
    test_loss,total,total_correct = 0,0,0
    
    iterator = tqdm(testloader)
    
    for inputs, targets in iterator:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        total_correct += (predicted == targets).sum().item()

    # Save checkpoint when best model
    acc = 100. * total_correct / total
    print("\nValidation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" %(epoch+1, test_loss/len(testloader), acc))

    f = open(results_txt+".txt","a+")
    f.write("Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%% \n" %(epoch+1, test_loss/len(testloader), acc))
    f.close() 
        
    
    if acc > best_acc:
        
        
        if isinstance(net, torch.nn.DataParallel):
            print("multiple GPU")
            print('Saving Best model...\t\t\tTop1 = %.2f%%' %(acc))
            state = {
                'model':net.module.state_dict(),
                'model1': net.state_dict(),
                'model2': net,
                'acc':acc,
                'epoch':epoch,
            }
        
        else:
            print("not multiple GPU")
            state = {
                    'model':net,
                    'acc':acc,
                    'epoch':epoch,
                    }      
            
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        save_point = './checkpoint/'
        if not os.path.isdir(save_point):
            os.mkdir(save_point)
        torch.save(state, save_point+model_name+'.t7')
        best_acc = acc
        
    return best_acc

then i use this code to load wave and label

path_to_text = '/content/dataset_folder/'
dataset_generation_array = []
daataset = []

for n in os.listdir(path_to_text):
  path1 = path_to_text+n+'/'
  path_generate =glob.glob(path1+'*.WAV')+glob.glob(path1+'*.wav')
  for b in path_generate:
    dataset_generation_array.append(b)
random.shuffle(dataset_generation_array)
for i in dataset_generation_array:
  make_label_with_wave = []
  signalss, sample_raterr = torchaudio.load(i)
  make_label_with_wave.append(signalss)
  make_label_with_wave.append(sample_raterr)
  make_label_with_wave.append(i[len(path_to_text):len(path_to_text)+1])
  daataset.append(make_label_with_wave)
train_audio_path = '/content/dataset_folder/'
labels_dict=os.listdir(train_audio_path)
plt.plot(daataset[0][0].t())
plt.show()
print (labels_dict)
print (daataset[0][2])
Audio(daataset[0][0], rate = sample_raterr)

then use this to do zero padding to make all data same size

wave1 = []
labels = []
wave = []
max = 0
for i in daataset:
  wave1.append(i[0])
  labels.append(i[2])

for i in wave1:
  if max < i.shape[1]:
    max = i.shape[1]
#print (max)
for j in wave1:
  wave.append(F.pad(input=j, pad=(max - j.shape[1], 0), mode='constant', value=0))

now select the model and transform and also data loader

train_audio_transforms = nn.Sequential(torchaudio.transforms.MFCC(log_mels=False))
net = NN2D(num_class=35)

dataset= SpeechDataLoader(wave,labels,labels_dict, train_audio_transforms)
traindata, testdata = random_split(dataset, [round(len(dataset)*.8), round(len(dataset)*.2)])
trainloader = torch.utils.data.DataLoader(traindata, batch_size=1, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=1, shuffle=True)

at the end use it to do train

num_epochs=15
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
result_file = 'results_txt'
net = net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.001)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                              steps_per_epoch=int(len(trainloader)),
                                              epochs=num_epochs,
                                              anneal_strategy='linear') 

for epoch in range(0, num_epochs):
    train(net,trainloader,optimizer,scheduler,criterion,epoch,device)
    best_acc = test(net,testloader,optimizer,criterion,epoch,device,result_file,'0')

until this step all things works good
but when i want to load the model and predict new data i get the error that i said.

i found the problem:
the normal shape of data is [16,3,8]
but batch make shape: [1,16,3,8] because batch size is: 1
so in test data need to use
inputs = inputs.unsqueeze(0)
to make shape [1,16,3,8] and solve the problem
thanks guys