I’m trying to finetune a resnet18 on cifar10, everyhting is straight foward yet for some weird reason I’m getting :
**RuntimeError** : element 0 of tensors does not require grad and does not have a grad_fn
I have seen a similar question here but I cant understand why I’m getting this error as im not doing anything like that here!
Can Anyone please tell me what I’m missing ?
Here is the full stack trace :
Files already downloaded and verified
Files already downloaded and verified
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
in
27 dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size = batch_size, shuffle=False, num_workers = 2)
28
---> 29 train_info, val_info = training(resnet18, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device)
in training(model, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device)
56
57 optimizer.zero_grad()
---> 58 loss.backward()
59 optimizer.step()
60
~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
105 products. Defaults to ``False``.
106 """
--> 107 torch.autograd.backward(self, gradient, retain_graph, create_graph)
108
109 def register_hook(self, hook):
~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
91 Variable._execution_engine.run_backward(
92 tensors, grad_tensors, retain_graph, create_graph,
---> 93 allow_unreachable=True) # allow_unreachable flag
94
95
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
And this my code :
import torch
from torchvision import models, datasets
import torch.nn as nn
resnet18 = models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(512, 10)
for param in resnet18.parameters():
param.requires_grad = False
# and for fc
for param in resnet18.fc.parameters():
param.requires_grad = False
#%%
# lets create a training and testing functions for our case
def validation(model, dataloader_test, criterion, k, device):
loss_total = 0.0
acc_perbatch = 0.0
with torch.no_grad():
for imgs, labels in dataloader_test:
imgs, labels = imgs.to(device), labels.to(device)
# actiate evaluation mode
model.eval()
preds = model(imgs)
loss_val = criterion(preds, labels)
_, indexes = preds.topk(k, dim=1)
results = (indexes.view(*labels.shape) == labels).float()
acc_perbatch += torch.mean(results)
loss_total += loss_val.item()
acc = acc_perbatch/len(dataloader_test)
loss_final = loss_total/len(dataloader_test)
return loss_final, acc
def training(model, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device):
model = model.to(device)
# activate trainig mode
model.train()
training_acc_losses = []
val_acc_losses = []
trainig_batch_count = len(dataloader_train)
test_batch_count = len(dataloader_test)
for e in range(epochs):
acc_per_batch = 0.0
training_loss = 0.0
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
preds = model(imgs)
loss = criterion(preds, labels)
# calculate training accuracy
_, class_indexes= preds.topk(k=1, dim=1)
results = (class_indexes.view(*labels.shape) == labels).float()
acc_per_batch += torch.mean(results)
training_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % interval == 0 :
print(f'(epoch/iter): {e}/{i} train-loss: {loss.item():.6f} train-acc: {acc_per_batch/trainig_batch_count:.4f} ')
# accumulate accuracies and losses per epoch
training_acc_losses.append( (acc_per_batch/ trainig_batch_count, training_loss/ trainig_batch_count))
# run validation test at every epoch!
val_acc_loss = validation(model, dataloader_test, criterion, k=1, device=device)
val_acc_losses.append(val_acc_loss)
print(f'val_loss : {val_acc_loss[0]:.4f} val_acc: {val_acc_loss[1]:.4f}')
return training_acc_losses, val_acc_losses
#%%
epochs = 10
k = 1
batch_size = 32
interval = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet18.parameters(), lr = 0.00001)
transformations_train = transforms.Compose([transforms.Resize(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225))
])
transformations_test = transforms.Compose([transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225))
])
dataset_train = datasets.CIFAR10('CIFAR10', train=True, transform = transformations_train, download=True)
dataset_test = datasets.CIFAR10('CIFAR10', train=False, transform = transformations_test, download=True)
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size = batch_size, shuffle=True, num_workers = 2)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size = batch_size, shuffle=False, num_workers = 2)
train_info, val_info = training(resnet18, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device)
Thank you all in advance