GoogleNet transfer learning

I have been trying to use transger learning in google net on the vgg-face dataset and it takes too long and does not seem to progress in learning eventhiugh the learning_step is quite low
Here is the code on getting the features form google net and trianing the model:

class cvv_train(nn.Module):
  def __init__(self, num_classes):
    super(cvv_train, self).__init__()
    
    # import googlenet
    googletnet = torch.hub.load('pytorch/vision:v0.10.0', 'googlenet', pretrained=True)
    
    # feature extraction
    self.feature_extractor = nn.Sequential(*list(googletnet.children())[:-2])# pool5-drop_7x7_s1

    self.final = nn.Sequential(
            nn.Linear(googletnet.fc.in_features, num_classes), nn.BatchNorm1d(num_classes, momentum=0.01)
        )
  def forward(self, x):
    #no gradient/backpropagation for learning
    with torch.no_grad():
        x = self.feature_extractor(x) #assume: 1024x1x1
    x = x.view(x.size(0), -1) #assume: 1024x1
    return self.final(x) #assume: 512x1
for epoch in range(100):

  epoch_metrics = {'loss': [], 'acc':[]}
  prev_time = time.time()
  print(f"--- Epoch {epoch} ---")
  for batch_i, (X, y) in enumerate(train_dataloader):
    if X.size(0) == 1:
        continue

    image_sequences = Variable(X.to(device), requires_grad=True)
    labels = Variable(y[0].to(device), requires_grad=False)

    optimizer.zero_grad()

    # Get sequence predictions
    predictions = model(image_sequences)
    del image_sequences

    # Compute metrics
    loss = cls_criterion(predictions, labels)
    acc = 100 * (predictions.detach().argmax(1) == labels).cpu().numpy().mean()
    del labels

    loss.backward()
    optimizer.step()

    # Keep track of epoch metrics
    epoch_metrics["loss"].append(loss.item())
    epoch_metrics["acc"].append(acc)

    # Determine approximate time left
    batches_done = epoch * len(train_dataloader) + batch_i
    batches_left = 20 * len(train_dataloader) - batches_done
    time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time))
    prev_time = time.time()

    # Print log
    sys.stdout.write(
        "\r[Epoch %d/%d] [Batch %d/%d] [Loss: %f (%f), Acc: %.2f%% (%.2f%%)] ETA: %s"
        % (
            epoch,
            20,
            batch_i,
            len(train_dataloader),
            loss.item(),
            np.mean(epoch_metrics["loss"]),
            acc,
            np.mean(epoch_metrics["acc"]),
            time_left,
        )
    ) 
    # Empty cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()


  # Evaluate the model on the test set
  test_model(epoch)

  # Save model checkpoint
  if epoch % 5 == 0:
      os.makedirs("model_checkpoints", exist_ok=True)
      torch.save(model.state_dict(), '/content/gdrive/My Drive/ColabNotebooks/Emotion_rec_2/cnn_training/{}.pth'.format(epoch))
  exp_lr_scheduler.step()
  # shuffle train_dataloader again
  train_dataloader = DataLoader(train_set_load, batch_size=32, shuffle=True, num_workers=4)   
        

Here are some extra lines of code:

cls_criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

However after more than 150 epochs the loss decreased from 6.1 to 4.2 but it can get down to 3.7 sometimes ) per epoch.
I am training the model on 302 classes (less than the available classes in vgg-face) However the accuaracy does not exceed 27%.
Am I doing something wrong or do you have any suggestions?