Predict Script for Image classification

Hello, I’m trying to make a predict script based on the test one. But I keep getting error.
This is the Code for the test:
Engine:

def test(self,val_loader, model, criterion):
        top1 = AverageMeter()
        top5 = AverageMeter()
        print_freq = 100
        # switch to evaluate mode
        model.eval()
        with torch.no_grad():
            for i, (input, target) in enumerate(val_loader):
                target = target.cuda()
                input = input.cuda()
                # forward
                attention_maps, _, output1 = model(input)
                refined_input = mask2bbox(attention_maps, input)
                _, _, output2 = model(refined_input)
                output = (F.softmax(output1, dim=-1)+F.softmax(output2, dim=-1))/2
                # measure accuracy and record loss
                prec1, prec5 = accuracy(output, target, topk=(1, 5))
                loss = criterion(output, target)
                top1.update(prec1[0], input.size(0))
                top5.update(prec5[0], input.size(0))

                if i % print_freq == 0:
                    print('Test: [{0}/{1}]\t'
                        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'
                        'Loss {loss.val:.3f} ({loss.avg:.3f})'.format(
                            i, len(val_loader),
                            top1=top1, top5=top5,loss=loss))

            print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.val:.3f}'
                .format(top1=top1, top5=top5,loss=loss))

        return top1.avg, top5.avg
def test():
    ##
    engine = Engine()
    config = getConfig()
    data_config = getDatasetConfig(config.dataset)
    # define dataset
    transform_test = transforms.Compose([
        transforms.Resize((config.image_size, config.image_size)),
        transforms.CenterCrop(config.input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    val_dataset = CustomDataset(
        data_config['val'], data_config['val_root'], transform=transform_test)
    val_loader = DataLoader(
        val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True)
    # define model
    if config.model_name == 'inception':
        net = inception_v3_bap(pretrained=True, aux_logits=False)
    elif config.model_name == 'resnet50':
        net = resnet50(pretrained=True)

    in_features = net.fc_new.in_features
    new_linear = torch.nn.Linear(
        in_features=in_features, out_features=val_dataset.num_classes)
    net.fc_new = new_linear

    # load checkpoint
    use_gpu = torch.cuda.is_available() and config.use_gpu
    if use_gpu:
        net = net.cuda()
    gpu_ids = [int(r) for r in config.gpu_ids.split(',')]
    if use_gpu and len(gpu_ids) > 1:
        net = torch.nn.DataParallel(net, device_ids=gpu_ids)
    net = torch.nn.DataParallel(net)
    #checkpoint_path = os.path.join(config.checkpoint_path,'model_best.pth.tar')
    net.load_state_dict(torch.load(config.checkpoint_path)['state_dict'])

    # define loss
    # define loss
    criterion = torch.nn.CrossEntropyLoss()
    if use_gpu:
        criterion = criterion.cuda()
    prec1, prec5 = engine.test(val_loader, net, criterion)

my codes for predict:
Engine

def predict(self,val_loader, model, criterion):
        
        top1 = AverageMeter()
        top5 = AverageMeter()
        print_freq = 100
        # switch to evaluate mode
        model.eval()
        with torch.no_grad():
                
                image = Image.open("/content/drive/My Drive/volkswagen-beetle-2017-1280-01-1536873547.jpg").convert('RGB')
                
                rgb_mean = np.mean(image, axis=(0, 1))
                rgb_std = np.std(image, axis=(0, 1))
                # forward
                attention_maps, _, output1 = model(image)
                refined_input = mask2bbox(attention_maps, image)
                _, _, output2 = model(refined_input)
                output = (F.softmax(output1, dim=-1)+F.softmax(output2, dim=-1))/2
                # measure accuracy and record loss
                prec1, prec5 = accuracy(output, target, topk=(1, 5))
                loss = criterion(output, target)
                top1.update(prec1[0], input.size(0))
                top5.update(prec5[0], input.size(0))

                if i % print_freq == 0:
                    print('Test: [{0}/{1}]\t'
                        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'
                        'Loss {loss.val:.3f} ({loss.avg:.3f})'.format(
                            i, len(val_loader),
                            top1=top1, top5=top5,loss=loss))

            

        return output
def predict():
    ##
    engine = Engine()
    config = getConfig()
    data_config = getDatasetConfig(config.dataset)
    # define dataset
    transform_test = transforms.Compose([
        transforms.Resize((config.image_size, config.image_size)),
        transforms.CenterCrop(config.input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    val_dataset = CustomDataset(
        data_config['val'], data_config['val_root'], transform=transform_test)
    val_loader = DataLoader(
        val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True)
    # define model
    if config.model_name == 'inception':
        net = inception_v3_bap(pretrained=True, aux_logits=False)
    elif config.model_name == 'resnet50':
        net = resnet50(pretrained=True)

    in_features = net.fc_new.in_features
    new_linear = torch.nn.Linear(
        in_features=in_features, out_features=val_dataset.num_classes)
    net.fc_new = new_linear

    # load checkpoint
    use_gpu = torch.cuda.is_available() and config.use_gpu
    if use_gpu:
        net = net.cuda()
    gpu_ids = [int(r) for r in config.gpu_ids.split(',')]
    if use_gpu and len(gpu_ids) > 1:
        net = torch.nn.DataParallel(net, device_ids=gpu_ids)
    #checkpoint_path = os.path.join(config.checkpoint_path,'model_best.pth.tar')
    net = torch.nn.DataParallel(net)
    net.load_state_dict(torch.load(config.checkpoint_path)['state_dict'])
    
    # define loss
    # define loss
    criterion = torch.nn.CrossEntropyLoss()
    if use_gpu:
        criterion = criterion.cuda()
    output = engine.predict(val_loader, net, criterion)
    arr = numpy.array(lst) 
    idx = (-arr).argsort()[:5]
    print(idx)

I got this error:

 predict()
  File "train_bap.py", line 251, in predict
    output = engine.predict(val_loader, net, criterion)
  File "/content/drive/My Drive/WS_DAN_PyTorch-master/utils/engine.py", line 200, in predict
    attention_maps, _, output1 = model(image)
  File "/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 153, in forward
    return self.module(*inputs[0], **kwargs[0])
  File "/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/content/drive/My Drive/WS_DAN_PyTorch-master/model/inception_bap.py", line 152, in forward
    x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
TypeError: 'Image' object is not subscriptable

You are passing the PIL.Image directly to the model as the input, which won’t work, since a tensor is expected:

image = Image.open("/content/drive/My Drive/volkswagen-beetle-2017-1280-01-1536873547.jpg").convert('RGB')
                
rgb_mean = np.mean(image, axis=(0, 1))
rgb_std = np.std(image, axis=(0, 1))
attention_maps, _, output1 = model(image)

You could either use the val_loader or transform the PIL.Image to a tensor using the transform_test transformation.

1 Like

Thank you for you response
I tried this now

from PIL import Image
def image_loader(loader, image_name):
    image = Image.open(image_name)
    image = loader(image).float()
    image = torch.tensor(image, requires_grad=True)
    image = image.unsqueeze(0)
    return image

def predict():
    ##
    engine = Engine()
    config = getConfig()
    data_config = getDatasetConfig(config.dataset)
    transform_test = transforms.Compose([
        transforms.Resize((config.image_size, config.image_size)),
        transforms.CenterCrop(config.input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    val_dataset = CustomDataset(
        data_config['val'], data_config['val_root'], transform=transform_test)
    val_loader = DataLoader(
        val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True)
    # define dataset
    
    # define model
    if config.model_name == 'inception':
        net = inception_v3_bap(pretrained=True, aux_logits=False)
    elif config.model_name == 'resnet50':
        net = resnet50(pretrained=True)

    in_features = net.fc_new.in_features
    new_linear = torch.nn.Linear(
        in_features=in_features, out_features=val_dataset.num_classes)
    net.fc_new = new_linear

    # load checkpoint
    use_gpu = torch.cuda.is_available() and config.use_gpu
    if use_gpu:
        net = net.cuda()
    gpu_ids = [int(r) for r in config.gpu_ids.split(',')]
    if use_gpu and len(gpu_ids) > 1:
        net = torch.nn.DataParallel(net, device_ids=gpu_ids)
    #checkpoint_path = os.path.join(config.checkpoint_path,'model_best.pth.tar')
    net = torch.nn.DataParallel(net)
    net.load_state_dict(torch.load(config.checkpoint_path)['state_dict'])

    # define loss
    # define loss
    criterion = torch.nn.CrossEntropyLoss()
    if use_gpu:
        criterion = criterion.cuda()
    output=list(net(image_loader(transform_test, "/content/drive/My Drive/volkswagen-beetle-2017-1280-01-1536873547.jpg")))
    #output=np.asarray(output)
    arr = np.array(output) 
    idx = np.argmax(arr)
    print(idx)

but I got this error :

image = torch.tensor(image, requires_grad=True)
Traceback (most recent call last):
  File "train_bap.py", line 272, in <module>
    predict()
  File "train_bap.py", line 262, in predict
    idx = np.argmax(arr)
  File "<__array_function__ internals>", line 6, in argmax
  File "/usr/local/lib/python3.6/site-packages/numpy/core/fromnumeric.py", line 1188, in argmax
    return _wrapfunc(a, 'argmax', axis=axis, out=out)
  File "/usr/local/lib/python3.6/site-packages/numpy/core/fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
  File "/usr/local/lib/python3.6/site-packages/torch/tensor.py", line 28, in wrapped
    return f(*args, **kwargs)
RuntimeError: The size of tensor a (24576) must match the size of tensor b (12) at non-singleton dimension 3```
I can't figure out how to solve it.

This error seems to be raised by numpy in

idx = np.argmax(arr)

Could you post the shape and dtype of arr?

(3,) object
I have 9170 class in this dataset. It’s odd

output seems to be a list with tensors.
Could you check the shape of each tensor?
If they have the same shape, use torch.stack(output) to create a single tensor from the list.

torch.Size([1, 32, 12, 12])torch.Size([1, 24576])torch.Size([1, 9170])
I think that the last one is the one that contains the softmax prediction right ?
Should I apply stack to the last tensor?

I don’t know, what your model is returning, but if the last tensor contains the logits, then you should apply torch.argmax(tensor, dim=1) on it to get the predicted class (out of 9170 classes).

Yeah it seems to have a good prediction now, :slight_smile:
tensor([1018], device=‘cuda:0’, grad_fn=)
If I want to get the first 5 classes, is there a way to do it using tensors? by changing the dimension to 5
or should I convert it to numy and apply the argsort()[:5]

If you want to use only the logits of the first 5 classes, you could slice the tensor via:

preds = torch.argmax(logits[:, :5], dim=1)