Problem with accuracy during learn

Hi ! I have problem with my network. Training proces does not work. It is my code:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
from data_reader import Data_Loader
import matplotlib.pyplot as plt
import IPython.display as ipd
from torchsummary import summary


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

train_set = Data_Loader("training")
print(len(train_set))
test_set = Data_Loader("testing")


waveform, sample_rate, label, speaker_id, utterance_number = train_set[0]


l = list(set(datapoint[2] for datapoint in train_set))
labels = sorted(l)

new_sample_rate = 8000
transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate)
transformed = transform(waveform)

ipd.Audio(transformed.numpy(), rate=new_sample_rate)

def label_to_index(word):
    return torch.tensor(labels.index(word))


def index_to_label(index):
    return labels[index]


def pad_sequence(batch):
    batch = [item.t() for item in batch]
    batch = torch.nn.utils.rnn.pad_sequence(batch, batch_first=True, padding_value=0.)
    return batch.permute(0, 2, 1)


mel_spectogram = torchaudio.transforms.MelSpectrogram(
    sample_rate = 8000,
    n_fft=1024,
    hop_length=512,
    n_mels=64  
).to(device)

def collate_fn(batch):

    tensors, targets = [], []

    for waveform, _, label, *_ in batch:
        tensors += [waveform]
        targets += [label_to_index(label)]

    tensors = pad_sequence(tensors)
    tensors = mel_process(tensors)
    targets = torch.stack(targets)
    
    return tensors, targets

def mel_process(tensors):
    tensor = []
    for ten in tensors:
        ten = ten.to(device)
        ten = transform(ten)
        data_mel = mel_spectogram(ten)
        tensor.append(data_mel)
    tensor = torch.stack(tensor)
    return tensor
    

batch_size = 106

if device == "cuda":
    num_workers = 1
    pin_memory = True
else:
    num_workers = 0
    pin_memory = False
    

train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=num_workers,
    pin_memory=pin_memory,
)

test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=num_workers,
    pin_memory=pin_memory,
)

class Lstm_model(nn.Module):
    def __init__(self, input_dim , hidden_size , num_layers ,batch_size):
        super(Lstm_model, self).__init__()
        self.num_layers = num_layers
        self.input_size = input_dim
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_size=input_dim , hidden_size = hidden_size , num_layers= num_layers )
        self.fc = nn.Linear(hidden_size,8)

    def forward(self,x,hn,cn):
        out , (hn,cn) = self.lstm(x , (hn,cn))
        final_out = self.fc(out[-1])
        return final_out,hn,cn

    def predict(self,x):
        hn,cn  = self.init()
        out , (hn,cn) = self.lstm(x , (hn,cn))
        final_out = self.fc(out[-1])
        return final_out

    def init(self):
        h0 =  torch.zeros(self.num_layers , self.batch_size , self.hidden_size).to(device)
        c0 =  torch.zeros(self.num_layers , self.batch_size , self.hidden_size).to(device)
        return h0 , c0

input_dim = 8
n_hidden = 50
num_layers = 8
model = Lstm_model(input_dim, n_hidden, num_layers, batch_size)
model.to(device)
print(model)


optimizer = optim.Adam(model.parameters(), lr=0.05)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)  # reduce the learning after 20 epochs by a factor of 10
loss_function = nn.CrossEntropyLoss()
def train(model, epoch, log_interval):
    model.train()
    hn, cn = model.init()
    loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.squeeze(1)
        data = data.to(device)
        data = transform(data)
        target = target.to(device)
        data = torch.reshape(data, (64,batch_size, 8))
        output, hn, cn = model(data, hn, cn)
        #print(output.shape)  
        loss = loss_function(output, target)
        optimizer.zero_grad()
        hn = hn.detach()
        cn = cn.detach()
        loss.backward()
        optimizer.step()


    print(f"Train Epoch: {epoch} Loss: {loss.item():.6f}")



def number_of_correct(pred, target):
    return pred.squeeze().eq(target).sum().item()


def get_likely_index(tensor):
    return tensor.argmax(dim=-1)

def test(model, epoch):
    model.eval()
    correct = 0
    hn, cn = model.init()
    for data, target in test_loader:
        if len(data) < batch_size: continue
        data = data.squeeze(1)
        outputs = []
        data = data.to(device)
        data = transform(data)
        target = target.to(device)
 
        data = torch.reshape(data, (64,batch_size, 8))
        output, hn, cn = model(data, hn, cn) 

        pred = get_likely_index(output)
        correct += number_of_correct(pred, target)


    print(f"\nTest Epoch: {epoch}\tAccuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n")
    return 100. * correct / len(test_loader.dataset)

for epoch in range(1, n_epoch + 1):
        train(model, epoch, log_interval)
        acc = test(model, epoch)
        accuracy.append(acc)

When I launch script there is no error but accuracy is still 12 %.It seems like there is not any change during learning proces. I don’t know what I do wrong. Can someone see mistake in my code ?

I don’t see any obvious errors in your code so try to scale down the use case and overfit a small dataset (e.g. just 10 samples) and make sure your model is able to do so.

@ptrblck Unfortunately when I used small dataset, it was the same problem. Maybe should I use min - max scaler for my data (example: from -1 to 1) ? Can it be helpful ?