I trained Vgg16 model from scratch but I see that the weights and biases didn’t change after training. In addition to that, the accuracy remains constant so does the loss. Does anyone have an Idea why this could be happening. I tried batch normalzation, normalizing images, standardizing images, data augumenation but the results don’t improve. Even transfer learning wasn’t that helpful.
Thanks in advance!
Here is the code I used. when i printed the average weights and biases they don’t change at all after training.
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# Initialize these variables which will be set in this if statement. Each of these
# variables is model specific.
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet50
"""
model_ft = models.resnet50(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, input_size), nn.ReLU(), nn.Dropout(0.2),nn.Linear(input_size, 5))
input_size = 224
elif model_name == "alexnet":
""" Alexnet
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Sequential(nn.Linear(num_ftrs, input_size), nn.ReLU(), nn.Dropout(0.2),nn.Linear(input_size, 5))
input_size = 224
elif model_name == "vgg":
""" VGG16
"""
model_ft = models.vgg16(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Sequential(nn.Linear(num_ftrs, input_size), nn.ReLU(), nn.Dropout(0.2),nn.Linear(input_size, 5))
input_size = 224
elif model_name == "squeezenet":
""" Squeezenet
"""
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(input_size, num_classes, kernel_size=(1,1), stride=(1,1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == "densenet":
""" Densenet
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Sequential(nn.Linear(num_ftrs, input_size), nn.ReLU(), nn.Dropout(0.2),nn.Linear(input_size, 5))
input_size = 224
elif model_name == "inception":
""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, input_size), nn.ReLU(), nn.Dropout(0.2),nn.Linear(input_size, 5))
input_size = 512
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
# Initialize the model for this run
model1, input_size = initialize_model(model_name = "vgg", num_classes = 5, feature_extract=False, use_pretrained=False)
# Print the model we just instantiated
print(model1)
#VGG16 weight average before training
index = [0,2,5,7,10,12,14,17,19,21,24,26,28]
for i in index:
x = model.features[i].weight.detach().numpy()
print(x.mean())
a = sum(x)/len(x)
b = sum(a)/len(a)
c = sum(b)/len(b)
d = sum(c)/len(c)
#print(d)
index = [0,2,5,7,10,12,14,17,19,21,24,26,28]
for i in index:
x = model.features[i].bias.detach().numpy()
print(x.mean())
#training and validation for vgg and resnet
from sklearn.metrics import confusion_matrix
totEpochs = 5
Train_acc = []
Val_acc=[]
Train_loss = []
Val_loss = []
#torch.autograd.set_detect_anomaly(True)
for ep in range(totEpochs):
# Training loop
epoch_loss = 0
trainCorrect = 0
predlist=torch.zeros(0,dtype=torch.long)#, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long)#, device='cpu')
#model.train()
with tqdm(total=len(trainset), desc=f'Epoch {ep + 1}/{totEpochs}', unit='img') as pbar:
for batch in trainLoader:
img = batch['image']#.to(device)
label = batch['label']#.to(device)
#depth = batch['depth']
net_pred = model(img)#,depth.unsqueeze(1))
print(label)
print(net_pred)
#net_pred = net_pred.type(torch.long)
loss = criterion(net_pred, label)
epoch_loss += loss.item()*img.size(0)
pbar.set_postfix(**{'Loss (Batch)': loss.item()})
optimizer.zero_grad()
loss.backward()
nn.utils.clip_grad_value_(model.parameters(), 0.1)
optimizer.step()
pred = torch.argmax(net_pred, dim=1)
#Accuracy calc
trainCorrect += torch.sum(pred == label)
#Confusion matrix
pred = pred#.to(device)
predlist = predlist#.to(device)
predlist=torch.cat([predlist,pred])#.view(-1)])
lbllist = lbllist#.to(device)
lbllist=torch.cat([lbllist,label])#.view(-1)])
#trainCorrect += (net_pred.argmax(1) == label).type(torch.float).sum().item()
# Calculate training accuracy
#pred = torch.argmax(net_pred, dim=1)
#correct_tensor = pred.eq(label)
# Need to convert correct tensor from int to float to average
#accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
# Multiply average accuracy times the number of examples in batch
#train_acc += accuracy.item()# * img.size(0)
pbar.update(img.shape[0])
pbar.set_postfix(**{'Average Loss': (epoch_loss/len(trainLoader.dataset))*batchsize, 'training Accuracy': f'{(trainCorrect/len(trainLoader.dataset))*batchsize}'})
print(predlist)
print(lbllist)
conf_mat_Train=confusion_matrix(lbllist.cpu().numpy(), predlist.cpu().numpy())
print(conf_mat_Train)
cm_accuracy=100*conf_mat_Train.diagonal().sum()/(conf_mat_Train.sum())
print(cm_accuracy)
Train_acc.append(cm_accuracy)
Train_loss.append((epoch_loss/len(trainLoader.dataset))*batchsize)
#pbar.set_postfix(**{'Average Loss': epoch_loss/len(trainset), 'training Accuracy': f'{train_acc:.2f}'})
#pbar.set_postfix(**{'training Accuracy': f'{train_acc:.2f}%'})
# validation loop
val_loss = 0
valCorrect = 0
predlist=torch.zeros(0,dtype=torch.long)#, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long)#, device='cpu')
#model.eval()
with tqdm(total=len(valset), desc=f'Validation', unit='img') as pbar:
for batch in validLoader:
with torch.no_grad():
img = batch['image']#.to(device)
label = batch['label']#.to(device)
#depth = batch['depth']
net_pred = model(img)#,depth.unsqueeze(1))
#net_pred = net_pred.type(torch.long)
loss = criterion(net_pred, label)
val_loss += loss.item()*img.size(0)
#valCorrect += (net_pred.argmax(1) == label).type(torch.float).sum().item()
pbar.set_postfix(**{'Loss (validation)': loss.item()})
#calculate validation aaccuracy
pred = torch.argmax(net_pred, dim=1)
valCorrect += torch.sum(pred == label)
#Confusion matrix
pred = pred#.to(device)
predlist = predlist#.to(device)
predlist=torch.cat([predlist,pred])#.view(-1)])
lbllist = lbllist#.to(device)
lbllist=torch.cat([lbllist,label])#.view(-1)])
#correct_tensor = pred.eq(label)
#accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
# Multiply average accuracy times the number of examples
#valid_acc += accuracy.item()# * img.size(0)
pbar.update(img.shape[0])
scheduler.step(val_loss / len(validLoader.dataset))
pbar.set_postfix(**{'Average Loss': val_loss/len(validLoader.dataset), 'validation Accuracy': f'{valCorrect/len(validLoader.dataset)}'})
print(predlist)
print(lbllist)
conf_mat_Val=confusion_matrix(lbllist.cpu().numpy(), predlist.cpu().numpy())
print(conf_mat_Val)
cm_accuracy=100*conf_mat_Val.diagonal().sum()/(conf_mat_Val.sum())
print(cm_accuracy)
Val_acc.append(cm_accuracy)
Val_loss.append(val_loss/len(validLoader.dataset))
#pbar.set_postfix(**{'Average Loss': val_loss / len(valset), 'Validation Accuracy': f'{valid_acc:.2f}'})
#pbar.set_postfix(**{'Validation Accuracy': f'{valid_acc:.2f}%'})
#print(f'\t\tTraining Accuracy: {train_acc:.2f}%\t Validation Accuracy: {valid_acc:.2f}%')
#VGG16 weight average after training
index = [0,2,5,7,10,12,14,17,19,21,24,26,28]
for i in index:
x = model.features[i].weight.detach().numpy()
print(x.mean())
a = sum(x)/len(x)
b = sum(a)/len(a)
c = sum(b)/len(b)
d = sum(c)/len(c)
#print(d)
index = [0,2,5,7,10,12,14,17,19,21,24,26,28]
for i in index:
x = model.features[i].bias.detach().numpy()
print(x.mean())
Based on your code it seems you are :
- freezing the entire model first
- replace the
.classifier
or.fc
(depending on the model) with a new trainable custom module - check the weight average of the
.features
(which was frozen) before and after the training.
In this case, the .features
are expected to show the same values, since they have been frozen.