Following FinetuningVFeatureExtracting but on a different dataset.
I am feature extracting on the CIFAR_10 dataset by trying out a bunch of different models. Specifically
these ones: [‘resnet’, ‘alexnet’, ‘densenet’, ‘squeezenet’, ‘inception’, ‘vgg’]. Plotting Loss and accuracy for train and validation datasets.
Initial Configuration of hyperparameters and other paraphernalia pertaining to setting up the models.
num_epochs = 20
model_name = 'squeezenet'
num_classes = 10
feature_extract=True
The list of models to choose from as mentioned are:
['resnet', 'alexnet', 'densenet', 'squeezenet', 'inception', 'vgg'].
I understand in feature extraction the parameters for the original pre_trained
model from the above list will be turned off. Which I have promptly done with the following piece of code.
def set_parameters_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
The re-initialized parameters of the reshaped final classifier layer will by default have required_grad as
True. The output features of this classifier having the same units as the number of classes in the dataset. Here is the code for initialization.
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
model_ft = None
input_size = 0
if model_name == 'resnet':
"""Resent34"""
model_ft = models.resent34(pretrained=use_pretrained)
set_parameters_grad(model_ft, feature_extract)
num_features = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_features, num_classes)
input_size = 224
elif model_name == 'vgg':
"""vgg11_bn"""
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameters_grad(model_ft, feature_extract)
num_features = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_features, num_classes)
input_size = 224
elif model_name == 'alexnet':
"""Alexnet"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameters_grad(model_ft, feature_extract)
num_features = modelf_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_features, num_classes)
input_size = 224
elif model_name == 'squeezenet':
"""Squeezenet"""
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameters_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == 'densenet':
"""Densenet"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameters_grad(model_ft, feature_extract)
num_features = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_features, num_classes)
input_size = 224
elif model_name == 'inception':
"""Inception v 3"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameters_grad(model_ft, feature_extract)
aux_features = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(aux_features, num_classes)
num_features = model_ft.fc.in_feautres
model_ft.fc = nn.Linear(num_features, num_classes)
input_size = 229
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
Calling initialize model
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
Finetuing vs Feature Extraction
If feature_extract is true, the weight and bias of the classifier are trained.
Else everything is trained since the set_params_grad() function would not turn off gradients, this is because feature_extract would evaluate to false.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
params_to_update.append(param)
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
Params to learn:
classifier.1.weight
classifier.1.bias
Optimizing the collected parameters.
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
Train model function.
Here is my train_model() function extended to hold train and validation accuracy as well.
def train(model, dataloaders, optimizer, criterion, num_epochs, is_inception, device='cuda'):
"""trains model and retunrs model state_dict and loss and accuracy for train and validation"""
start = time.time()
# train loss and accuracy
train_loss_history = []
train_acc_history = []
# valdiation loss and accuracy
val_loss_history = []
val_acc_history = []
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())
for epoch in range(num_epochs):
print(f'Epoch {epoch+1}/{num_epochs}')
print(f'=='*20)
for phase in ['train', 'valid']:
if phase == 'train':
model.train()
else:
model.eval()
running_loss = 0.0
running_corrects = 0.0
for inputs, labels in dataloaders[phase]:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase == 'train'):
if is_inception and phase == 'train':
outputs, aux_outputs = model.forward(inputs)
loss1 = criterion(outputs, labels)
loss2 =criterion(aux_outputs, labels)
loss = loss1 + 0.4 * loss2
else:
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
_, preds = outputs.max(dim=1)
if phase == 'train':
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels)
epoch_loss = running_loss/dataset_size[phase]
epoch_acc = running_corrects.double()/dataset_size[phase]
print("{} Loss: {} Acc: {}".format(phase, epoch_loss, epoch_acc))
if phase == 'valid' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'train':
train_loss_history.append(epoch_loss)
train_acc_history.append(epoch_acc.item())
elif phase == 'valid':
val_loss_history.append(epoch_loss)
val_acc_history.append(epoch_acc.item())
time_elapsed = time.time() - start
print('Time taken to train {:.0f}m {:0f}s'.format(time_elapsed//60, time_elapsed % 60))
print('best val acc:'.format(best_acc))
model.load_state_dict(best_model_wts)
return best_model_wts, train_loss_history, val_loss_history, train_acc_history, val_acc_history
When I call my train function I get the “SGD object is not callable” error which has been wracking my nerves for quite some time."
criterion = nn.CrossEntropyLoss()
model_ft, train_loss_history, val_loss_history, train_acc_history, val_acc_history =
train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))
The error it throws is
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-45-9f1089dd9a39> in <module>()
1
2 criterion = nn.CrossEntropyLoss()
----> 3 model_ft, train_loss_history, val_loss_history, train_acc_history, val_acc_history =
train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs,
is_inception=(model_name=="inception"))
<ipython-input-26-36ea4e6ad598> in train_model(model, dataloaders, optimizer, criterion, num_epochs, is_inception, device)
46 else:
47 outputs = model.forward(inputs)
---> 48 loss = criterion(outputs, labels)
49
50 _, preds = outputs.max(dim=1)
TypeError: 'SGD' object is not callable
-
The params_to_update contains only the gradients which have a gradient as True, i.e. need to be optimized as pointed out earlier. I am to my knowledge not calling SGD as a function or method, so I don’t know what Python wants me to know.
-
Does model_name==“inception” in the inception flag change the model to inception? And is something going wrong there? That shouldn’t be, cause model_name was set to “squeezenet”, a bit confused here.
This post seems to be about the same thing. The person seems to be filtering those parameters which have requires_grad == True. Which has been done here as well right?
optimizer_conv = torch.optim.SGD((filter(lambda p: p.requires_grad, model.parameters())), lr=0.001, momentum=0.9)
Which is the same as storing the ones with requires_grad in params_to_update? Please help, thank you so much for your time.