CNN script not getting results

I’m just getting started with pytorch. I am trying to do a simple binary classification project with the cats and dogs dataset. After much fumbling around, I was able to get the model to train, but I’m not getting the expected results.

First, the loss starts out way too low. To me, that seems to indicate I’m not measuring loss correctly.

Second, the model just predicts everything as 0.

I’m sure there are many mistakes here, but I would appreciate it if someone could take a look and let me know what I’m doing wrong. Thank you!

import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import make_grid
from torchvision.utils import save_image
from sklearn.model_selection import train_test_split
import os
import numpy as np
from sklearn import preprocessing
import glob
import cv2

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

IMAGE_SIZE = 64
DATA_DIR = "C:\\Users\\joeys\\source\\repos\\pytorch-youtube\\data\\catsdogs\\PetImages\\"
LABELS = ('cat', 'dog')


# custom dataset class
# expects the root folder to have sub folders with class names
# and pictures of classes inside folder
class CustomImageDataset(Dataset):
    def __init__(self):
        self.imgs_path = DATA_DIR
        file_list = glob.glob(self.imgs_path + "*")
        self.data = []
        for class_path in file_list:
            class_name = class_path.split("\\")[-1]
            for img_path in glob.glob(class_path + "\\*.jpg"):
                self.data.append([img_path, class_name])
        self.class_map = {"Dog": 0, "Cat": 1}
        self.img_dim = (IMAGE_SIZE, IMAGE_SIZE)
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, class_name = self.data[idx]
        # this is to handle corrupt images in the dataset
        # could probably be handled better
        try:
            img = cv2.imread(img_path)
            img = cv2.resize(img, self.img_dim)
        except:
            img_path, class_name = self.data[idx+1]
            img = cv2.imread(img_path)
            img = cv2.resize(img, self.img_dim)
        class_id = self.class_map[class_name]
        img_tensor = torch.from_numpy(img)
        img_tensor = img_tensor.permute(2, 0, 1) # not exactly sure what/why for this line
        class_id = torch.tensor([class_id])
        return img_tensor, class_id


# as is, we aren't using these
transform = transforms.Compose(
    [transforms.Resize((64, 64)),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),

    ]
)

dataset = CustomImageDataset()
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


dataiter = iter(dataloader)
train_features, train_labels = dataiter.next() 


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(2704, 128) # only way I got input size was by running code
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
# net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):

        inputs, labels = data
        
        # this is the fix for "expected scalar type Byte but found Float"
        # this seems to completely destroy the features in the image to just white
        inputs = inputs.float()
        optimizer.zero_grad()

        outputs = net(inputs)

        loss = criterion(outputs, torch.max(labels,1)[1])
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.10f}')
            running_loss = 0.0

print("finished")

# save the model
PATH = './custom_trained_model_dogs_cats.pth'
torch.save(net.state_dict(), PATH)

For a typical classification problem I would recommend checking out the ImageNet example: examples/main.py at master · pytorch/examples (github.com) which is a good starting point especially concerning issues such as loading images and preprocessing them to torch tensors.

From there it would be useful to see if your model can overfit a very small training set (e.g., just a few images and check that the loss goes to zero).
As a sanity check, cross-entropy loss should start out at around -ln(1/n_classes), which should be ~0.693 here. If it starts at at a wildly higher or lower number something in the training loop is suspicious.

Further reading (source of the above advice): A Recipe for Training Neural Networks (karpathy.github.io)