@ptrblck
hi ,i have tried vgg16,vgg16, densenet ,resnet… and i tired chaging a lot parameters but validation loss doesnt decrease . i tried with loss functions: adam,SGD, lr_schedulars: reduceonpleatue , stepLR
lr=[0.1,0.001,0.0001,0.007,0.0009,0.00001] , weight_decay=0.1 . my dataset os imbalanced so i used weightedrandomsampler but didnt worked . i trained model almost 8 times with different pretraied models and parameters but validation loss never decreased from 0.84 . i have used different epocs 25,50,100 .
i changed only last classifier layer of pretrained model. dataset contains total 8 classes . please help me on this .
train_transforms=transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
#transformations for validation and test datasets
valid_transforms=transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
train_data=datasets.ImageFolder(os.path.join(data_dir,'train'),transform=train_transforms)
valid_data=datasets.ImageFolder(os.path.join(data_dir,'valid'),transform=valid_transforms)
test_data=datasets.ImageFolder(os.path.join(data_dir,'test'),transform=valid_transforms)
sample_counts=[]
for i in [train_data.imgs,test_data.imgs,valid_data.imgs]:
for item in i:
sample_counts.append(item[1])
class_counts=dict(pd.Series(sample_counts).value_counts())
class_sample_count=[class_counts[i] for i in sorted(class_counts)]
weights = 1 / torch.Tensor(class_sample_count)
weights = weights.double()
train_targets = [sample[1] for sample in train_data.imgs]
train_samples_weight = [weights[class_id] for class_id in train_targets]
test_targets = [sample[1] for sample in test_data.imgs]
test_samples_weight = [weights[class_id] for class_id in test_targets]
from torch.utils.data.sampler import WeightedRandomSampler
batch_size=64
#loding the data into loaders
train_Loader=torch.utils.data.DataLoader(train_data,batch_size=batch_size,
sampler=WeightedRandomSampler(train_samples_weight,
len(train_data)),shuffle=False,num_workers=4)
valid_Loader=torch.utils.data.DataLoader(valid_data,batch_size=batch_size,
shuffle=False,num_workers=4)
test_Loader=torch.utils.data.DataLoader(test_data,batch_size=batch_size,
sampler=WeightedRandomSampler(test_samples_weight, len(test_data)),
shuffle=False,num_workers=4)
`
model
model_transfer=models.vgg16(pretrained=True)
for param in model_transfer.features.parameters():
param.requires_grad = False
n_inputs=model_transfer.classifier[6].in_features
n_classes=len(train_data.classes)
#defining the last layer
last_layer=nn.Linear(n_inputs,n_classes)
#replacing the classification layer
model_transfer.classifier[6]=last_layer
if use_cuda:
model_transfer = model_transfer.cuda()
from torch.optim.lr_scheduler import StepLR,ReduceLROnPlateau
criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optim.SGD(model_transfer.parameters(),lr=0.0001,
momentum=0.9,weight_decay=0.01, nesterov=True)
#scheduler = StepLR(optimizer_transfer, step_size=5, gamma=0.1)
scheduler=ReduceLROnPlateau(optimizer_transfer, mode='max', factor=0.7, patience=3,
verbose=True)
`
train
def train(n_epochs, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer,
use_cuda,save_path):
valid_loss_min = np.Inf
for epoch in range(1, n_epochs+1):
# keep track of training and validation loss
train_loss = 0.0
valid_loss=0.0
correct=0.0
# model by default is set to train
for batch_idx, (data, target) in enumerate(loaders_transfer['train']):
# move tensors to GPU if CUDA is available
if use_cuda:
data, target = data.cuda(), target.cuda()
# clear the gradients of all optimized variables
optimizer_transfer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = model_transfer(data)
# calculate the batch loss
loss = criterion_transfer(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer_transfer.step()
# update training loss
train_loss += ((1 / (batch_idx + 1)) * (loss.data - train_loss))
_,pred=torch.max(output,1)
correct+=torch.sum(pred.eq(target.data.view_as(pred)))
# validate the model
model_transfer.eval()
for batch_idx, (data, target) in enumerate(loaders_transfer['valid']):
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
## update the average validation loss
with torch.no_grad():
output=model_transfer(data)
loss=criterion_transfer(output.data,target)
#v_total+=data.size(0)
valid_loss += ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
# print training/validation statistics
print('Epoch: {} Training Loss: {:.6f} Accuracy {:.6f} Validation Loss: {:.6f}'.format(
epoch,
train_loss,
correct/len(train_data),
valid_loss,
))
scheduler.step(valid_loss)
#save model
if valid_loss < valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model...'.format(valid_loss_min, valid_loss))
torch.save(model_transfer.state_dict(), save_path)
valid_loss_min = valid_loss
return model_transfer
loaders_transfer={'train':train_Loader,
'valid':valid_Loader,
'test':test_Loader
}
model_transfer = train(25, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer,
use_cuda, 'model_transfer.pt')
loss
Epoch: 1 Training Loss: 0.324256 Accuracy 0.903248 Validation Loss: 0.847355
Validation loss decreased (inf --> 0.847355). Saving model...
Epoch: 2 Training Loss: 0.321892 Accuracy 0.901493 Validation Loss: 0.841263
Validation loss decreased (0.847355 --> 0.841263). Saving model...
Epoch: 3 Training Loss: 0.310799 Accuracy 0.908692 Validation Loss: 0.853691
Epoch: 4 Training Loss: 0.307710 Accuracy 0.905004 Validation Loss: 0.845387
Epoch: 5 Training Loss: 0.316569 Accuracy 0.899561 Validation Loss: 0.836307
Validation loss decreased (0.841263 --> 0.836307). Saving model...
Epoch: 6 Training Loss: 0.296088 Accuracy 0.917120 Validation Loss: 0.845122
Epoch: 7 Training Loss: 0.298336 Accuracy 0.908692 Validation Loss: 0.848735
Epoch 7: reducing learning rate of group 0 to 7.0000e-05.
Epoch: 8 Training Loss: 0.304659 Accuracy 0.909745 Validation Loss: 0.843582
Epoch: 9 Training Loss: 0.296660 Accuracy 0.915716 Validation Loss: 0.847272
Epoch: 10 Training Loss: 0.307698 Accuracy 0.907463 Validation Loss: 0.846216
Epoch: 11 Training Loss: 0.308325 Accuracy 0.907287 Validation Loss: 0.839601
Epoch 11: reducing learning rate of group 0 to 4.9000e-05.
Epoch: 12 Training Loss: 0.304152 Accuracy 0.905180 Validation Loss: 0.843276
Epoch: 13 Training Loss: 0.293480 Accuracy 0.914662 Validation Loss: 0.847402
Epoch: 14 Training Loss: 0.304286 Accuracy 0.906409 Validation Loss: 0.846597
Epoch: 15 Training Loss: 0.301022 Accuracy 0.911677 Validation Loss: 0.842252
`