Image Classification completely wrong with PyTorch Mobile iOS example

Dear all,

My trained/traced model got a good performance on PC. However, when i ship the model.pt to PyTorch Mobile and tested on iOS. the classification of same Image is completely wrong.
i have no idea where is the problem and how to solve it.
my model.pt is generated using Transfer learning with resnet18…

Please help! thanks

Without seeing the code it’s hard to debug, but you could check:

  • your image loading and make sure all images are loaded in RGB format (which is the default in PyTorch, as it’s using PIL)
  • make sure the same preprocessing is applied (same resizing, normalization etc.)
  • call model.eval() after creating the instance and loading the state_dict.

here is the code of a Standard Transfer Learning with PyTorch:


from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode


# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.ColorJitter(brightness=1, contrast=1, saturation=1, hue=0.5),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = '20200328_data/EA2020Test'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    #torch.save(model.state_dict(), 'newtest.pth')
    return model




def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
	param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, len(class_names))

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv = train_model(model_conv, criterion, optimizer_conv,exp_lr_scheduler, num_epochs=25)

example = torch.rand(1, 3, 224, 224)

traced_script_module = torch.jit.trace(model_conv.cpu(), example)

traced_script_module.save("mymodel-jit-cpu.pt")


#model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=25)

#visualize_model(model_conv)

And here is the code of validation on MacOS:

# sample execution (requires torchvision)

import torch
from PIL import Image
from torchvision import datasets, models, transforms
import os

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.ColorJitter(brightness=1, contrast=1, saturation=1, hue=0.5),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

input_image = Image.open('./20200328_data/EA2020Test/val/Class3/frame100.jpg')
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

data_dir = '20200328_data/EA2020Test'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes



model_conv = torch.jit.load('mymodel-jit-cpu.pt')
model_conv.eval()

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model_conv(input_batch)
    _, preds = torch.max(output, 1)
print(class_names[preds[0]])

With these codes, the single image is correctly classified. (e.g. Class3)

And here is the github of where the HelloWorld PyTorch mobile used:

I just replaced the model-jit-cpu.pt and words.txt in the aboved example code.
but the same image (e.g. frame28.jpg) is classified as Class23…

Thanks for helping me to figure out where is the problem.

The workflow seems to be correct. Are you able to check the raw outputs of your model and compare it to your Python validation code?

thanks a lot for the hints, i will let you know the raw outputs dataset (40 tensor) later. Thanks

Here is the raw output of my mode on iOS xcode environment:
where the classification is totally wrong, the 23th tensor 7.87…

override func viewDidLoad() {
        super.viewDidLoad()
        let image = UIImage(named: "frame28.jpg")!
        imageView.image = image
        let resizedImage = image.resized(to: CGSize(width: 224, height: 224))
        guard var pixelBuffer = resizedImage.normalized() else {
            return
        }
        guard let outputs = module.predict(image: UnsafeMutableRawPointer(&pixelBuffer)) else {
            return
        }
        print(outputs)
        let zippedResults = zip(labels.indices, outputs)
        print(labels.indices)
        let sortedResults = zippedResults.sorted { $0.1.floatValue > $1.1.floatValue }.prefix(3)
        
        var text = ""
        for result in sortedResults {
            text += "\u{2022} \(labels[result.0]) \n\n"
        }
        resultView.text = text

result:
[0.7592794, 1.170417, 0.4990637, 4.677132, -0.9478517, -2.447758, 1.918674, 0.5767481, 1.738559, -0.2370548, 0.4759183, 2.106035, -0.5786518, -1.511588, -2.636765, -2.717191, -0.9784743, -0.7371585, -2.024575, -2.237826, -2.281566, -0.03008107, 7.870783, 1.824642, 0.3037256, -0.4274126, 0.4281527, -0.4137115, -2.859639, -2.277968, -1.029291, -0.2246231, 1.358673, 2.990178, 2.051248, -0.05761524, 2.349458, 0.004223851, -1.494618, -2.429391]

Here is the code of Python validation:

# sample execution (requires torchvision)

import torch
from PIL import Image
from torchvision import datasets, models, transforms
import os

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.ColorJitter(brightness=1, contrast=1, saturation=1, hue=0.5),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

input_image = Image.open('./20200328_data/EA2020Test/val/Class3/frame28.jpg')
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

data_dir = '20200328_data/EA2020Test'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes



model_conv = torch.jit.load('mymodel-jit-cpu.pt')
model_conv.eval()

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model_conv(input_batch)
    _, preds = torch.max(output, 1)
print(class_names[preds[0]])
print(output)
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
# print(torch.nn.functional.softmax(output[0], dim=0))

And here is the output tensor:
tensor([[ 3.0214e+00, 3.8240e-01, -9.9551e-01, 3.9651e+00, 2.6704e+00,
-1.5943e+00, 9.1830e-01, -6.0747e-03, -1.3737e+00, -6.4241e-01,
5.8611e-01, 2.2428e+00, -1.2174e+00, -1.3305e-01, -2.4817e+00,
-1.5725e+00, -1.0427e+00, -2.0795e+00, -2.4415e+00, -2.3984e-01,
-2.3185e+00, -9.6899e-02, 9.8113e+00, 1.5801e-01, -8.7105e-01,
-1.9445e+00, -5.7967e-01, -2.9598e-01, -1.9398e+00, -1.6080e+00,
-7.6736e-01, -9.2793e-01, 1.9376e+00, 1.9328e+00, 1.3377e+00,
-1.8019e-01, 2.2197e+00, -7.0646e-02, -2.4584e-01, -1.6802e+00]])
And Python classificaiton is correct, where the 3rd class (Class3) is labeled.
I got some feeling it might be some difference between torch.max (Python validation) and torch.exp
(Torch Mobile)?

here is the code of TorchModule.mm

#import "TorchModule.h"
#import <LibTorch/LibTorch.h>

@implementation TorchModule {
 @protected
  torch::jit::script::Module _impl;
}

- (nullable instancetype)initWithFileAtPath:(NSString*)filePath {
  self = [super init];
  if (self) {
    try {
      auto qengines = at::globalContext().supportedQEngines();
      if (std::find(qengines.begin(), qengines.end(), at::QEngine::QNNPACK) != qengines.end()) {
        at::globalContext().setQEngine(at::QEngine::QNNPACK);
      }
      _impl = torch::jit::load(filePath.UTF8String);
      _impl.eval();
    } catch (const std::exception& exception) {
      NSLog(@"%s", exception.what());
      return nil;
    }
  }
  return self;
}

- (NSArray<NSNumber*>*)predictImage:(void*)imageBuffer {
  try {
    at::Tensor tensor = torch::from_blob(imageBuffer, {1, 3, 224, 224}, at::kFloat);
    torch::autograd::AutoGradMode guard(false);
    at::AutoNonVariableTypeMode non_var_type_mode(true);
    auto outputTensor = _impl.forward({tensor}).toTensor();
    float* floatBuffer = outputTensor.data_ptr<float>();
    if (!floatBuffer) {
      return nil;
    }
    NSMutableArray* results = [[NSMutableArray alloc] init];
    for (int i = 0; i < 40; i++) {
      [results addObject:@(floatBuffer[i])];
    }
    return [results copy];
  } catch (const std::exception& exception) {
    NSLog(@"%s", exception.what());
  }
  return nil;
}

@end

thansk a lot for your help!

@dormouse, could you try using png instead of jpg? - https://github.com/pytorch/pytorch/issues/27813

Yes, i have just tried with frame28.png, but the classification is exactly the same (far from ground truth) as jpg. :joy: :joy: :joy: :joy: :joy: :joy: :joy: :joy:

@ptrblck @xta0
I have figured out where is the problem.
i double checked the predict output of Desktop and iOS, the torch.max gives the same tensor value(more or less same)
But in iOS, the label.txt should be generated along with PyTorch learning process.
I manually created a label.txt, like:
Class1
Class2
Class3

which is not the case during PyTorch learning.
During the PyTorch learning, with the dataloader, the class_name should be shuffled. So, just replace the shuffled label.txt with manually generated label.txt in xcode project should be fine!

Thanks for your attention.

1 Like