Transfer Learning: Unknown bug

Hey community,
I have downloaded a resnet50 pretrained model (i tried different other architectures) for image classification, and i have changed the last layer(classifier) so it can classify my 5 image classes, but it suffers from overfitting and even though i tried a simple architecture and added dropout layer, it still not able to generalize on test data, it gives very low accuracy on the test data(around 10%).

so help please :frowning:

here is the code:

`data_transforms = {
‘train’: transforms.Compose([
transforms.RandomResizedCrop(size=500, scale=(0.8, 1.0)),
transforms.RandomRotation(degrees=15),
transforms.ColorJitter(),
transforms.RandomHorizontalFlip(),
transforms.CenterCrop(size=500), # Image net standards
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
]),
‘test’: transforms.Compose([
transforms.Resize(size=500),
transforms.CenterCrop(size=500),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}

data_dir = ‘cassava-disease’
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in [‘train’, ‘test’]}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32,
shuffle=True, num_workers=0)
for x in [‘train’, ‘test’]}
dataset_sizes = {x: len(image_datasets[x]) for x in [‘train’, ‘test’]}
class_names = image_datasets[‘train’].classes

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs ))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'test']:
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()   # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        #if phase == 'train':
         #   scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            phase, epoch_loss, epoch_acc))

        # deep copy the model
        if phase == 'test' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model, 'model.pth')
            print('***Model Saved!***')

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
model.load_state_dict(best_model_wts)
return model

model_ft = models.resnet50(pretrained=True)

model_ft

import adabound
#model_ft = models.vgg16(pretrained=True)
#model_ft.aux_logits=False

num_ftrs = model_ft.fc.in_features
for param in model_ft.parameters():
param.requires_grad = False

Here the size of each output sample is set to 5.

model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 256),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(128, 5),
nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()

Observe that all parameters are being optimized

optimizer_ft = adabound.AdaBound(model_ft.fc.parameters(), lr=1e-3, final_lr=0.1)

#optimizer_ft = optim.Adam(model_ft.classifier.parameters(), lr=0.003)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = model_ft.to(device)

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=100)
`

@ptrblck_de any idea please!

Overfitting can generally be reduced by adding regularization to the model (e.g. through dropout, weight decay, a more aggressive data augmentation etc.). Also, make sure the data is sampled from the same domains from the training, validation, and test splits.
The used data augmentation also depends on the actual use case. E.g. while heavy rotations might be beneficial for images or galaxies, it might not be a good idea for landscape scenes.