Trying to understand if my CNN is training correctly

Hello,

What I have is a CNN to classify between a car and a truck so just binary classification.

I believe my dataset splitting to be correct but I’m just stuck on understanding if what I’m doing in the training is right. I’m using an existing model used from an example MNIST CNN but configured to what I think should train a binary problem.

Some of the problems I have tried fixing and hope to be correct is the reshaping of the data to make the loss function happy.

I’ve tried printing what “pred = model(inputs)” returns and noticed the model to only show with 0s.

df = pd.read_csv("...")

X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values

# X = array([[[135, 133, 131, ..., 166, 166, 165], 1k Images flattened
#             [135, 132, 130, ..., 165, 167, 166], 
#             [131, 128, 127, ..., 165, 166, 167],
#             ...,

X = np.reshape(X, (X.shape[0], 256, 256))

X_tensor = torch.from_numpy(X).float()
y_tensor = torch.from_numpy(y).float()

class VehicleDataset(Dataset):
    def __init__(self, features, labels, transform=None):
        self.features = features
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        features = self.features[idx]
        features = features.unsqueeze(0)

        if self.transform:
            features = self.transform(features)

        return features, self.labels[idx]

batch_size = 32

transform = transforms.Compose([
    transforms.Normalize(mean=[118.3676], std=[62.2336])
])

dataset = VehicleDataset(X_tensor, y_tensor, transform=transform)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

class VehicleCNN(nn.Module):
    def __init__(self):
        super(VehicleCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Linear(in_features=64*63*63, out_features=600),
            nn.Linear(600, 120),
            nn.Linear(120, 1),
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1)
        x = self.layer3(x)
        return x

model = VehicleCNN()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = torch.nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=.0001)

# train
n_epoch = 10
for epoch in range(1, n_epoch+1):
    train_losses = []
    train_acc = []
    for idx, data in enumerate(train_loader):
        inputs, targets = data
        targets = targets.reshape((targets.shape[0], 1))

        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        pred = model(inputs)
        loss = criterion(pred, targets)

        train_losses.append(loss.item())
        train_acc.append((torch.max(pred, 1)[1] == targets).sum() / batch_size)

        loss.backward()
        optimizer.step()
    print('epoch: {}, loss:{}, acc:{}'.format(epoch, sum(train_losses)/(idx+1), sum(train_acc)/(idx+1) ))

Let me know if I need to explain anything else.

Using torch.max on a binary output containing logits in the shape [batch_size, 1] does not make sense since index0 will always be returned in torch.max(pred, 1)[1].
Apply a threshold to get the predictions. If you want to keep using logits you could just use pred > 0.0, if you want to use probabilities, apply a torch.sigmoid beforehand: torch.sigmoid(pred) > 0.5.