I am trying to train a Convolutional Neural Network to classify Cat and Dog Images and it is throwing an error that I can’t seem to solve.
Here is the whole error:
File "train.py", line 70, in <module>
main(config, args.resume)
File "train.py", line 44, in main
trainer.train()
File "...\Coding\ConvNet\base\base_trainer.py", line 85, in train
result = self._train_epoch(epoch)
File "...Coding\ConvNet\trainer\trainer.py", line 52, in _train_epoch
loss = self.loss(output, target)
File "...\Coding\ConvNet\model\loss.py", line 4, in ces_loss
return nn.CrossEntropyLoss(output,target)
File "...\Anaconda3\envs\ConvNet\lib\site-packages\torch\nn\modules\loss.py", line 898, in __init__
super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
File "...\Anaconda3\envs\ConvNet\lib\site-packages\torch\nn\modules\loss.py", line 23, in __init__
super(_WeightedLoss, self).__init__(size_average, reduce, reduction)
File "...\Anaconda3\envs\ConvNet\lib\site-packages\torch\nn\modules\loss.py", line 16, in __init__
self.reduction = _Reduction.legacy_get_string(size_average, reduce)
File "...\Anaconda3\envs\ConvNet\lib\site-packages\torch\nn\_reduction.py", line 42, in legacy_get_string
if size_average and reduce:
RuntimeError: bool value of Tensor with more than one value is ambiguous
I am relatively new to Pytorch so I am using the format found in the Github Repo listed below. I changed the train loader so that it uses ImageFolder.
From what ive seen previously I would assume that I need to put in a:
if not None
but to no avail.
Here is my DataLoader Script:
from torchvision import datasets, transforms
from base import BaseDataLoader
class CatDogDataLoader(BaseDataLoader):
"""
Cat and Dog data loading demo using BaseDataLoader
"""
def __init__(self, data_dir, batch_size, shuffle, validation_split, num_workers, training=True, IMG_SIZE=50):
self.IMG_SIZE = IMG_SIZE
trsfm = transforms.Compose([
transforms.Resize((self.IMG_SIZE,self.IMG_SIZE)),
transforms.Grayscale(1),
transforms.ToTensor()
])
self.data_dir = data_dir
self.dataset = datasets.ImageFolder(self.data_dir, transform=trsfm)
super(CatDogDataLoader, self).__init__(self.dataset, batch_size, shuffle, validation_split, num_workers)
and here is my TrainLoader:
import numpy as np
import torch
from torchvision.utils import make_grid
from base import BaseTrainer
class Trainer(BaseTrainer):
"""
Trainer class
Note:
Inherited from BaseTrainer.
"""
def __init__(self, model, loss, metrics, optimizer, resume, config,
data_loader, valid_data_loader=None, lr_scheduler=None, train_logger=None):
super(Trainer, self).__init__(model, loss, metrics, optimizer, resume, config, train_logger)
self.config = config
self.data_loader = data_loader
self.valid_data_loader = valid_data_loader
self.do_validation = self.valid_data_loader is not None
self.lr_scheduler = lr_scheduler
self.log_step = int(np.sqrt(data_loader.batch_size))
def _eval_metrics(self, output, target):
acc_metrics = np.zeros(len(self.metrics))
for i, metric in enumerate(self.metrics):
acc_metrics[i] += metric(output, target)
self.writer.add_scalar(f'{metric.__name__}', acc_metrics[i])
return acc_metrics
def _train_epoch(self, epoch):
"""
Training logic for an epoch
:param epoch: Current training epoch.
:return: A log that contains all information you want to save.
Note:
If you have additional information to record, for example:
> additional_log = {"x": x, "y": y}
merge it with log before return. i.e.
> log = {**log, **additional_log}
> return log
The metrics in log must have the key 'metrics'.
"""
self.model.train()
total_loss = 0
total_metrics = np.zeros(len(self.metrics))
for batch_idx, (data, target) in enumerate(self.data_loader):
data, target = data.to(self.device), target.to(self.device)
self.optimizer.zero_grad()
output = self.model(data)
loss = self.loss(output, target)
loss.backward()
self.optimizer.step()
self.writer.set_step((epoch - 1) * len(self.data_loader) + batch_idx)
self.writer.add_scalar('loss', loss.item())
total_loss += loss.item()
total_metrics += self._eval_metrics(output, target)
if self.verbosity >= 2 and batch_idx % self.log_step == 0:
self.logger.info('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
epoch,
batch_idx * self.data_loader.batch_size,
self.data_loader.n_samples,
100.0 * batch_idx / len(self.data_loader),
loss.item()))
self.writer.add_image('input', make_grid(data.cpu(), nrow=8, normalize=True))
log = {
'loss': total_loss / len(self.data_loader),
'metrics': (total_metrics / len(self.data_loader)).tolist()
}
if self.do_validation:
val_log = self._valid_epoch(epoch)
log = {**log, **val_log}
if self.lr_scheduler is not None:
self.lr_scheduler.step()
return log
def _valid_epoch(self, epoch):
"""
Validate after training an epoch
:return: A log that contains information about validation
Note:
The validation metrics in log must have the key 'val_metrics'.
"""
self.model.eval()
total_val_loss = 0
total_val_metrics = np.zeros(len(self.metrics))
with torch.no_grad():
for batch_idx, (data, target) in enumerate(self.valid_data_loader):
data, target = data.to(self.device), target.to(self.device)
output = self.model(data)
loss = self.loss(output, target)
self.writer.set_step((epoch - 1) * len(self.valid_data_loader) + batch_idx, 'valid')
self.writer.add_scalar('loss', loss.item())
total_val_loss += loss.item()
total_val_metrics += self._eval_metrics(output, target)
self.writer.add_image('input', make_grid(data.cpu(), nrow=8, normalize=True))
return {
'val_loss': total_val_loss / len(self.valid_data_loader),
'val_metrics': (total_val_metrics / len(self.valid_data_loader)).tolist()
}
I can include some more code if needed.
Thanks.