Accuracy inaccurately reports at 100% when testing Pytorch model

Heya, so I’m new to using python for creating models and I’ve just sorted out my loss from dropping to zero

My problem is that when I test my model, it reports that the accuracy is 100%, which I know probably isn’t the case

What am I doing wrong here? any help with my problem or errors within my code would be much appreciated

Please find my code below

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Hyperparameters
batch_size = 32
learning_rate = 0.001
num_epochs = 10

# Load and preprocess the dataset
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),         # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize pixel values
])

masterDatasetPath = 'D:\\VisualStudio - Repos\\303ass1v2\\new-data'

masterDataset = torchvision.datasets.ImageFolder(root=masterDatasetPath, transform=transform)

totalSamples = len(masterDataset)
trainSize = int(0.1 * totalSamples)
testSize = (totalSamples - trainSize)

trainDataset, testDataset = torch.utils.data.random_split(masterDataset, [trainSize, testSize])

train_loader = DataLoader(dataset=trainDataset, batch_size=batch_size, shuffle = True)
test_loader = DataLoader(dataset=testDataset, batch_size=batch_size, shuffle = False)

#train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Define the MLP model
class HandwrittenDigitClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(HandwrittenDigitClassifier, self).__init__()
        self.layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.Sigmoid(),
            nn.Linear(32, num_classes))

    def forward(self, x):
        return self.layers(x)

# Instantiate the model
input_size = 64*64*3
num_classes = 35
model = HandwrittenDigitClassifier(input_size, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
total_step = len(train_loader)

for epoch in range(num_epochs):

    for i, (images, labels) in enumerate(train_loader):
        #images = images.reshape(-1, input_size)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item()}')


print('Training finished!')

# Save the model
torch.save(model.state_dict(), 'handwritten_digit_classifier.pth')

def test_model(model, test_loader, device='cpu'):#can change 'cpu' to 'cuda' for gpu usage
    """
    Test the accuracy of a model on a given dataset.

    Args:
        model (nn.Module): The PyTorch model to be tested.
        test_loader (DataLoader): DataLoader for the test dataset.
        device (str): Device to run the model on ('cuda' for GPU or 'cpu' for CPU).

    Returns:
        float: The accuracy of the model on the test dataset.
    """
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient tracking during inference
        for data in test_loader:
            inputs, targets = data
            inputs, targets = inputs.to(device), targets.to(device)  # Move data to the specified device
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)  # Get the predicted class with the highest probability
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    return accuracy

print(test_model(model, test_loader))

I don’t see any obvious issues in your code and you could compare the predictions against the targets manually, e.g. by creating a plot, to see the diversity of the data and to double check if your model is able to perfectly overfit this dataset.