Transfer Learning resnet18, its getting stuck!

Here’s my code

Have no idea what the error is!
It just gets stuck here

################### Used to transform the data before feeding it into the neural network ##################
data_transforms = {
        'train': transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
                transforms.Resize(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
}
############################################################################################################



if __name__ == '__main__':
	
	##### import the VGG11 pre trained model #########################################################
	#model_ft = models.vgg11(pretrained=True)
	model_ft = models.resnet18(pretrained=True)
	###### freeze all the layers in the network ######################################## 
	for param in model_ft.parameters():
		param.requires_grad = False
	#### last layer created require gradient is True by default #########################
	#model_ft.classifier._modules['6'] = nn.Linear(4096,3)

	num_ftrs = model_ft.fc.in_features
	model_ft.fc = nn.Linear(num_ftrs,3)

	model_ft = model_ft.to(device)

	criterion = nn.CrossEntropyLoss()

	optimizer_ft = optim.SGD(model_ft.fc.parameters(),lr=0.001,momentum=0.9)

	exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft,step_size=6,gamma=0.1)

	model_ft = train_model(model_ft,criterion,optimizer_ft,exp_lr_scheduler,num_epochs=6)

Epoch 0/5

train Loss: 1.0191 Acc: 0.5015
Traceback (most recent call last):
File “demotest1.py”, line 218, in
model_ft = train_model(model_ft,criterion,optimizer_ft,exp_lr_scheduler,num_epochs=6)
File “demotest1.py”, line 119, in train_model
for i, (inputs,labels) in enumerate(dataloaders[phase]):
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 336, in next
return self._process_next_batch(batch)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 357, in _process_next_batch
raise batch.exc_type(batch.exc_msg)
RuntimeError: Traceback (most recent call last):
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 106, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 187, in default_collate
return [default_collate(samples) for samples in transposed]
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 187, in
return [default_collate(samples) for samples in transposed]
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 164, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 299 and 284 in dimension 3 at /pytorch/aten/src/TH/generic/THTensorMath.cpp:3616

It looks like your transformations aren’t used, as the batch contains differently shaped images.
Could you post your Dataset code?

I have already posted the data transformations on the top, here once again for your reference

data_transforms = {
        'train': transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
                transforms.Resize(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
}

I looked into this post https://github.com/marvis/pytorch-yolo2/issues/89 suggested using RandomCrop instead of Resize, which didn’t solve my issue instead it got this error

Traceback (most recent call last):
File “demotest1.py”, line 222, in
model_ft = train_model(model_ft,criterion,optimizer_ft,exp_lr_scheduler,num_epochs=2)
File “demotest1.py”, line 119, in train_model
for i, (inputs,labels) in enumerate(dataloaders[phase]):
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 336, in next
return self._process_next_batch(batch)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 357, in _process_next_batch
raise batch.exc_type(batch.exc_msg)
ValueError: Traceback (most recent call last):
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 106, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py”, line 106, in
samples = collate_fn([dataset[i] for i in batch_indices])
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torchvision/datasets/folder.py”, line 103, in getitem
sample = self.transform(sample)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torchvision/transforms/transforms.py”, line 49, in call
img = t(img)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torchvision/transforms/transforms.py”, line 421, in call
i, j, h, w = self.get_params(img, self.size)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/site-packages/torchvision/transforms/transforms.py”, line 399, in get_params
i = random.randint(0, h - th)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/random.py”, line 221, in randint
return self.randrange(a, b+1)
File “/home/ffffff/.virtualenvs/LearnPytorch/lib/python3.6/random.py”, line 199, in randrange
raise ValueError(“empty range for randrange() (%d,%d, %d)” % (istart, istop, width))
ValueError: empty range for randrange() (0,-29, -29)

However, using RandomSizeCrop solved my issue.
Wounder why this error occurs!

PS: typo it is RandomResizedCrop

I was just wondering how you actually called the transformations in the Dataset's __getitem__ method.
So using transforms.RandomResizedCrop didn’t work, but RandomSizeCrop works?

RandomResizedCrop did solve the issue.