Hi, I’m working on the cifar-10 dataset and trying to practice building models. I’m trying to tune all the hyper parameters and not getting high accuracy and low losses. Anyone know how I can achieve a higher accuracy and lower loss on the test dataset? i.e. 80%+ accuracy
Code:
import matplotlib.pyplot as plt
import os
import torch
import tqdm
from mlxtend.plotting import plot_confusion_matrix
from PIL import Image
from torch import nn
from torch.utils.data import DataLoader
from torchinfo import summary
from torchmetrics import Accuracy, ConfusionMatrix
from torchvision import datasets, transforms
from tqdm.auto import tqdm
NUM_WORKERS = os.cpu_count()
BATCH_SIZE = 32
class CifarModel(nn.Module):
"""__summary__"""
def __init__(self: "CifarModel", input_shape: int, hidden_units: int, output_shape: int) -> None:
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
# nn.Dropout(p=0.2),
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
# nn.Dropout(p=0.2),
)
self.conv_block_3 = nn.Sequential(
nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
# nn.Dropout(p=0.2),
)
dim = 8
self.classifier_layer = nn.Sequential(
nn.Flatten(),
nn.ReLU(),
nn.Linear(in_features=hidden_units * dim * dim, out_features=hidden_units * dim * dim),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(in_features=hidden_units * dim * dim, out_features=output_shape)
)
def forward(self, x):
x = self.conv_block_1(x)
x = self.conv_block_2(x)
# x = self.conv_block_3(x)
x = self.classifier_layer(x)
return x
if __name__ == "__main__":
train_transform = transforms.Compose([
transforms.ToTensor(),
transforms.RandomHorizontalFlip(),
# transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
test_transform = transforms.Compose([
transforms.ToTensor(),
])
train_data = datasets.CIFAR10(root="data", train=True, download=True, transform=train_transform)
test_data = datasets.CIFAR10(root="data", train=False, download=True, transform=test_transform)
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
classes = train_data.classes
model = CifarModel(input_shape=3, hidden_units=10, output_shape=len(classes))
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
accuracy_fn = Accuracy(task="multiclass", num_classes=len(classes))
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
epochs = 20
for epoch in range(epochs):
train_loss, train_acc = 0, 0
model.train()
for batch, (X, y) in enumerate(tqdm(train_dataloader, total=len(train_dataloader), position=0, desc="Training")):
y_pred = model(X)
loss = loss_fn(y_pred, y)
train_loss += loss
train_acc += accuracy_fn(y_pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# scheduler.step()
train_loss /= len(train_dataloader)
train_acc /= len(train_dataloader)
print(f"Epoch: {epoch} | Train loss: {train_loss:.5f} | Train acc: {train_acc * 100:.2f}%")
test_loss, test_acc = 0, 0
model.eval()
with torch.inference_mode():
for X_test, y_test in test_dataloader:
test_pred = model(X_test)
test_loss += loss_fn(test_pred, y_test)
test_acc += accuracy_fn(test_pred, y_test)
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)
print(f"Test loss: {test_loss:.2f} | Test accuracy: {test_acc * 100:.2f}%")
torch.save(model, "model.pt")