Hi everyone, as a beginner in deep learning, I am trying to recover classical neural networks manually.
Since imagenet is too large, I chose the smaller CIFAR10, I take the designed model from torchvision.models
and modify its final fully connected layer to fit the output.
First I tried alexnet and achieved 70.5% classification accuracy after convergence. Then I tried vgg16(in the order of models being published), and gratifyingly it improved the accuracy to 84.5%. I tried migration learning using torchvision’s pre-trained model, which increased the accuracy by an additional two percentage points.
Finally I tried ResNext, which I think is a relatively newer network and should yield better results. But after 20 epochs of training, it converged to 62% accuracy, even worse than Alexnet.
I would like to know what caused this and if it was due to my use error. Here’s the full code:
import os,sys,time,pickle,random
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torchvision import datasets, models
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision.transforms import ToTensor, Lambda, Resize, Compose, InterpolationMode
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
torch.backends.cudnn.benchmark=True
# Download training data from open datasets.
training_data = datasets.CIFAR10(
root=".\\data\\cifar10",
train=True,
download=True,
transform=Compose([
Resize((64, 64), InterpolationMode.BICUBIC),
ToTensor()
])
)
# Download test data from open datasets.
test_data = datasets.CIFAR10(
root=".\\data\\cifar10",
train=False,
download=True,
transform=Compose([
Resize((64, 64), InterpolationMode.BICUBIC),
ToTensor()
])
)
def imshow(training_data):
labels_map = {
0: "plane",
1: "car",
2: "bird",
3: "cat",
4: "deer",
5: "dog",
6: "frog",
7: "horse",
8: "ship",
9: "truck",
}
cols, rows = 3, 3
figure = plt.figure(figsize=(8,8))
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(training_data), size=(1,)).item()
img, label = training_data[sample_idx]
img = img.swapaxes(0,1)
img = img.swapaxes(1,2)
figure.add_subplot(rows, cols, i)
plt.title(labels_map[label])
plt.axis("off")
plt.imshow(img)
plt.show()
# imshow(training_data)
def train_loop(dataloader, net, loss_fn, optimizer):
size = len(dataloader)
train_loss = 0
for batch_idx, (X, tag) in enumerate(dataloader):
X, tag = X.to(device), tag.to(device)
pred = net(X)
loss = loss_fn(pred, tag)
train_loss += loss.item()
# Back propagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss /= size
return train_loss
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
return test_loss, correct
net = models.resnext50_32x4d().to(device)
net.fc = nn.Linear(2048, 10).to(device)
learning_rate = 0.01
batch_size = 128
weight_decay = 0.0005
train_dataloader = DataLoader(training_data, batch_size = batch_size)
test_dataloader = DataLoader(test_data, batch_size = batch_size)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = learning_rate)
epochs = 20
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
st_time = time.time()
train_loss = train_loop(train_dataloader, net, loss_fn, optimizer)
test_loss, correct = test_loop(test_dataloader, net, loss_fn)
print(f"Train loss: {train_loss:>8f}, Test loss: {test_loss:>8f}, Accuracy: {(100*correct):>0.1f}%, Epoch time: {time.time() - st_time:.2f}s\n")
print("Done!")
torch.save(net.state_dict(), 'resnext1-50_32x4d.model')