I have been trying to use transger learning in google net on the vgg-face dataset and it takes too long and does not seem to progress in learning eventhiugh the learning_step is quite low
Here is the code on getting the features form google net and trianing the model:
class cvv_train(nn.Module):
def __init__(self, num_classes):
super(cvv_train, self).__init__()
# import googlenet
googletnet = torch.hub.load('pytorch/vision:v0.10.0', 'googlenet', pretrained=True)
# feature extraction
self.feature_extractor = nn.Sequential(*list(googletnet.children())[:-2])# pool5-drop_7x7_s1
self.final = nn.Sequential(
nn.Linear(googletnet.fc.in_features, num_classes), nn.BatchNorm1d(num_classes, momentum=0.01)
)
def forward(self, x):
#no gradient/backpropagation for learning
with torch.no_grad():
x = self.feature_extractor(x) #assume: 1024x1x1
x = x.view(x.size(0), -1) #assume: 1024x1
return self.final(x) #assume: 512x1
for epoch in range(100):
epoch_metrics = {'loss': [], 'acc':[]}
prev_time = time.time()
print(f"--- Epoch {epoch} ---")
for batch_i, (X, y) in enumerate(train_dataloader):
if X.size(0) == 1:
continue
image_sequences = Variable(X.to(device), requires_grad=True)
labels = Variable(y[0].to(device), requires_grad=False)
optimizer.zero_grad()
# Get sequence predictions
predictions = model(image_sequences)
del image_sequences
# Compute metrics
loss = cls_criterion(predictions, labels)
acc = 100 * (predictions.detach().argmax(1) == labels).cpu().numpy().mean()
del labels
loss.backward()
optimizer.step()
# Keep track of epoch metrics
epoch_metrics["loss"].append(loss.item())
epoch_metrics["acc"].append(acc)
# Determine approximate time left
batches_done = epoch * len(train_dataloader) + batch_i
batches_left = 20 * len(train_dataloader) - batches_done
time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time))
prev_time = time.time()
# Print log
sys.stdout.write(
"\r[Epoch %d/%d] [Batch %d/%d] [Loss: %f (%f), Acc: %.2f%% (%.2f%%)] ETA: %s"
% (
epoch,
20,
batch_i,
len(train_dataloader),
loss.item(),
np.mean(epoch_metrics["loss"]),
acc,
np.mean(epoch_metrics["acc"]),
time_left,
)
)
# Empty cache
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Evaluate the model on the test set
test_model(epoch)
# Save model checkpoint
if epoch % 5 == 0:
os.makedirs("model_checkpoints", exist_ok=True)
torch.save(model.state_dict(), '/content/gdrive/My Drive/ColabNotebooks/Emotion_rec_2/cnn_training/{}.pth'.format(epoch))
exp_lr_scheduler.step()
# shuffle train_dataloader again
train_dataloader = DataLoader(train_set_load, batch_size=32, shuffle=True, num_workers=4)
Here are some extra lines of code:
cls_criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
However after more than 150 epochs the loss decreased from 6.1 to 4.2 but it can get down to 3.7 sometimes ) per epoch.
I am training the model on 302 classes (less than the available classes in vgg-face) However the accuaracy does not exceed 27%.
Am I doing something wrong or do you have any suggestions?