I have a dataset that contains images with the shape (3, 512, 512).
Here are some helper function that will be used for CNN to classify the images.
# Helper function that can count the number of correct predictions per data batch
def metrics_batch(output, target):
# Get output class
pred = output.argmax(dim = 1, keepdim = True)
# Compare output class with target class
correct = pred.eq(target.view_as(pred)).sum().item()
return correct
# Caluculate the loss value per batch
def loss_batch(loss_func, output, target, opt = None):
loss = loss_func(output, target)
with torch.no_grad():
metric_b = metrics_batch(output, target)
if opt is not None:
opt.zero_grad()
loss.backward()
opt.step()
return loss.item(), metric_b
# Calculate the loss value and the performance metric of the entire dataset i.e. an epoch
device = torch.device("cuda")
def loss_epoch(model, loss_func, dataset_dl, sanity_check = False, opt = None):
running_loss = 0.0
running_metric = 0.0
len_data = len(dataset_dl.dataset)
# An internal loop over the dataset
for xb, yb in dataset_dl:
#xb = xb.long()
xb = xb.to(device, dtype = torch.float)
#yb = yb.long()
yb = yb.to(device, dtype = torch.float)
# Get model output
output = model(xb)
# Get loss per batch
loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
# Update running loss and metric
running_loss += loss_b
if metric_b is not None:
running_metric += metric_b
# Break the loop in case of sanity check
if sanity_check is True:
break
# Average loss value
loss = running_loss/float(len_data)
# Average metric value
metric = running_metric/float(len_data)
return loss, metric
def train_val(model, params):
# extract model parameters
num_epochs=params["num_epochs"]
loss_func=params["loss_func"]
opt=params["optimizer"]
train_dl=params["train_dl"]
val_dl=params["val_dl"]
sanity_check=params["sanity_check"]
lr_scheduler=params["lr_scheduler"]
path2weights=params["path2weights"]
# history of loss values in each epoch
loss_history={
"train": [],
"val": [],
}
# histroy of metric values in each epoch
metric_history={
"train": [],
"val": [],
}
# a deep copy of weights for the best performing model
best_model_wts = copy.deepcopy(model.state_dict())
# initialize best loss to a large value
best_loss=float('inf')
# main loop
for epoch in range(num_epochs):
# get current learning rate
current_lr=get_lr(opt)
print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))
# train model on training dataset
model.train()
train_loss, train_metric=loss_epoch(model,loss_func,train_dl,sanity_check,opt)
# collect loss and metric for training dataset
loss_history["train"].append(train_loss)
metric_history["train"].append(train_metric)
# evaluate model on validation dataset
model.eval()
with torch.no_grad():
val_loss, val_metric=loss_epoch(model,loss_func,val_dl,sanity_check)
# store best model
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
# store weights into a local file
torch.save(model.state_dict(), path2weights)
print("Copied best model weights!")
# collect loss and metric for validation dataset
loss_history["val"].append(val_loss)
metric_history["val"].append(val_metric)
# learning rate schedule
lr_scheduler.step(val_loss)
if current_lr != get_lr(opt):
print("Loading best model weights!")
model.load_state_dict(best_model_wts)
print("train loss: %.6f, dev loss: %.6f, accuracy: %.2f" %(train_loss,val_loss,100*val_metric))
print("-"*10)
# load best model weights
model.load_state_dict(best_model_wts)
return model, loss_history, metric_history
However I encounter an error after I run this part of code
# Sanity check to TRUE
import copy
loss_func = nn.NLLLoss(reduction = "sum")
opt = optim.Adam(cnn_model.parameters(), lr = 3e-4)
lr_scheduler = ReduceLROnPlateau(opt, mode = 'min', factor = 0.5, patience = 25, verbose = 1)
params_train = {
"num_epochs": 5,
"optimizer": opt,
"loss_func": loss_func,
"train_dl": train_dl,
"val_dl": val_dl,
"sanity_check": False,
"lr_scheduler": lr_scheduler,
"path2weights": "./weights.pt",
}
cnn_model, loss_hist, metric_hist = train_val(cnn_model, params_train)
Error:
Epoch 0/4, current lr=0.0003
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-92-c403ac5cd5e9> in <module>()
16 }
17
---> 18 cnn_model, loss_hist, metric_hist = train_val(cnn_model, params_train)
5 frames
<ipython-input-91-850cb8dbad56> in train_val(model, params)
86 # train model on training dataset
87 model.train()
---> 88 train_loss, train_metric=loss_epoch(model,loss_func,train_dl,sanity_check,opt)
89
90 # collect loss and metric for training dataset
<ipython-input-91-850cb8dbad56> in loss_epoch(model, loss_func, dataset_dl, sanity_check, opt)
32 output = model(xb)
33 # Get loss per batch
---> 34 loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
35
36 # Update running loss and metric
<ipython-input-91-850cb8dbad56> in loss_batch(loss_func, output, target, opt)
8 # Caluculate the loss value per batch
9 def loss_batch(loss_func, output, target, opt = None):
---> 10 loss = loss_func(output, target)
11 with torch.no_grad():
12 metric_b = metrics_batch(output, target)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
203
204 def forward(self, input, target):
--> 205 return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
206
207
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2113 .format(input.size(0), target.size(0)))
2114 if dim == 2:
-> 2115 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2116 elif dim == 4:
2117 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target' in call to _thnn_nll_loss_forward
I have tried changing the datatype of target by using target.long() but no luck.
I also tried changing datatype of xb and yb but no luck there too.
Please help!