None of the samples have been tested, what is the reason for this

Gorgen · May 18, 2022, 9:02pm

hello everyone:
I don’t know what is wrong about my classification_report.why there is a category and none of the samples have been tested.
classification_report
precision recall f1-score support
0 0.00 0.00 0.00 37
1 0.39 1.00 0.56 42
2 0.67 0.06 0.11 33
accuracy 0.39 112
macro avg 0.35 0.35 0.22 112
weighted avg 0.34 0.39 0.24 112
What is the reason for this?
here is all of codes.
my task is about audio classification. the loss is criterion = nn.CrossEntropyLoss(), and optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate).
Could you please give me some suggestions?
Thanks
best wishes.
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from datasets import SoundDataset

Device configuration

device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

Hyper-parameters

input_size = 784 # 28x28

num_classes = 3
num_epochs = 1
batch_size = 1
learning_rate = 0.001

input_size = 64
sequence_length = 28
hidden_size = 128
num_layers = 2

Data loader

ANNOTATIONS_FILE = “F:/辅导人/姚文瀚/海天AD语音分类/traindata/data.xls”
AUDIO_DIR = “F:/辅导人/姚文瀚/海天AD语音分类/traindata/audio”
data = SoundDataset(ANNOTATIONS_FILE, AUDIO_DIR)
print(f"There are {len(data)} samples in the dataset.")
features, label = data[0]
print(‘features’, features.shape)
print(label)

split

length = len(data)
train_size, validate_size = int(0.8 * length), int(0.2 * length)

first param is data set to be saperated, the second is list stating how many sets we want it to be.

train_set, test_set = torch.utils.data.random_split(data, [train_size, validate_size])
print(len(train_set), len(test_set))

#dataloder

Data loader

batch_size = 1
train_loader = torch.utils.data.DataLoader(dataset=train_set,
batch_size=batch_size,
shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_set,
batch_size=batch_size,
shuffle=False)

for i, (features,labels) in enumerate(train_loader):

print(‘features’, features.shape)

print(‘labels’, labels)

print(’==================================================’)

for i, (features,labels) in enumerate(test_loader):

print(‘features’, features.shape)

print(‘labels’, labels)

#models

Fully connected neural network with one hidden layer

class LSTM(nn.Module):
def init(self, input_size, hidden_size, num_layers, num_classes):
super(LSTM, self).init()
self.num_layers = num_layers
self.hidden_size = hidden_size
# self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
# → x needs to be: (batch_size, seq, input_size)

    # self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, num_classes)

def forward(self, x):
    # Set initial hidden states (and cell states for LSTM)
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

    # x: (n, 28, 28), h0: (2, n, 128)

    # Forward propagate RNN
    # out, _ = self.rnn(x, h0)
    # or:
    out, _ = self.lstm(x, (h0, c0))

    # out: tensor of shape (batch_size, seq_length, hidden_size)
    # out: (n, 28, 128)

    # Decode the hidden state of the last time step
    out = out[:, -1, :]
    # out: (n, 128)

    out = self.fc(out)
    # out: (n, 10)
    return out

model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
print(model)

Loss and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Train the model

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (features, labels) in enumerate(train_loader):
# batch, seq, mel
features = features.to(device)
labels = labels.to(device)

    # Forward pass
    outputs = model(features)
    loss = criterion(outputs, labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')

Test the model

In test phase, we don’t need to compute gradients (for memory efficiency)

with torch.no_grad():
n_correct = 0
n_samples = 0
for features, labels in test_loader:
features = features.to(device)
labels = labels.to(device)

    # Forward pass
    outputs = model(features)
    # max returns (value ,index)
    _, predicted = torch.max(outputs.data, 1)
    n_samples += labels.size(0)
    n_correct += (predicted == labels).sum().item()

acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')

from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sn
import pandas as pd
import numpy as np

plt

import matplotlib.pyplot as plt

y_pred = []
y_true = []

iterate over test data

for features, labels in test_loader:
features = features.to(device)
labels = labels.to(device)

# Forward pass
output = model(features)
output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
y_pred.extend(output)  # Save Prediction

labels = labels.data.cpu().numpy()
y_true.extend(labels)  # Save Truth

constant for classes

classes = (‘AD’, ‘HC’, ‘MCI’)

Build confusion report

cf_report = classification_report(y_true, y_pred)
print(cf_report)

Build confusion matrix

cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix) * 10, index=[i for i in classes],
columns=[i for i in classes])
plt.figure(figsize=(12, 7))
sn.heatmap(df_cm, annot=True)
plt.savefig(‘output.png’)