Failed JIT Trace for Torch Mobile iOS (Size not specified in nn.interpolate?) (Segmentation)

I’m attempting to JIT trace a semantic segmentation model based on a Resnet50 dilated encoder and an accompanying PPMDeepsup decoder to use in iOS/torch mobile.

When running the following to JIT trace my segmentation module with a random image input of 224x224:

input_tensor = torch.rand(1, 3, 224, 224)
model.eval()
script_model = torch.jit.trace(model, input_tensor)

I get the the following trace. Apparently when my Forward function calls nn.interpolate() it claims that the size variable (final image segmentation size of 224x224) is not provided (found None) even though as seen in the trace below I’ve even tried hard coding the necessary tuple of (224, 224). Why am I still receiving this exception?

Trace

~/thd-visual-ai/segmentation/semantic-segmentation-pytorch/models/models.py in forward(self, conv_out, segSize)
    481         print(segSize)
    482         if self.use_softmax:  # is True during inference
--> 483             x = nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
    484             x = nn.functional.softmax(x, dim=1)
    485             return x

~/Desktop/Projects/thdEnv/lib/python3.6/site-packages/torch/nn/functional.py in interpolate(input, size, scale_factor, mode, align_corners)
   2516         raise NotImplementedError("Got 4D input, but linear mode needs 3D input")
   2517     elif input.dim() == 4 and mode == 'bilinear':
-> 2518         return torch._C._nn.upsample_bilinear2d(input, _output_size(2), align_corners)
   2519     elif input.dim() == 4 and mode == 'trilinear':
   2520         raise NotImplementedError("Got 4D input, but trilinear mode needs 5D input")

~/Desktop/Projects/thdEnv/lib/python3.6/site-packages/torch/nn/functional.py in _output_size(dim)
   2470 
   2471     def _output_size(dim):
-> 2472         _check_size_scale_factor(dim)
   2473         if size is not None:
   2474             return size

~/Desktop/Projects/thdEnv/lib/python3.6/site-packages/torch/nn/functional.py in _check_size_scale_factor(dim)
   2461     def _check_size_scale_factor(dim):
   2462         if size is None and scale_factor is None:
-> 2463             raise ValueError('either size or scale_factor should be defined')
   2464         if size is not None and scale_factor is not None:
   2465             raise ValueError('only one of size or scale_factor should be defined')

ValueError: either size or scale_factor should be defined

I’ve defined size at line 483 hardcoded to try to fix this and it’s still giving none. I will attach the model.summary() here of my semantic segmentation model to help.

Sequential(
  (0): ResnetDilated(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        )
      )
      (1): Bottleneck(
        (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (2): Bottleneck(
        (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
    )
    (layer2): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        )
      )
      (1): Bottleneck(
        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (2): Bottleneck(
        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (3): Bottleneck(
        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
    )
    (layer3): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        )
      )
      (1): Bottleneck(
        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
        (bn2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (2): Bottleneck(
        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
        (bn2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (3): Bottleneck(
        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
        (bn2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (4): Bottleneck(
        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
        (bn2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (5): Bottleneck(
        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
        (bn2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(1024, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
    )
    (layer4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False)
        (bn2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(2048, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): SynchronizedBatchNorm2d(2048, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        )
      )
      (1): Bottleneck(
        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(4, 4), dilation=(4, 4), bias=False)
        (bn2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(2048, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (2): Bottleneck(
        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(4, 4), dilation=(4, 4), bias=False)
        (bn2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): SynchronizedBatchNorm2d(2048, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
    )
  )
  (1): PPMDeepsup(
    (ppm): ModuleList(
      (0): Sequential(
        (0): AdaptiveAvgPool2d(output_size=1)
        (1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (3): ReLU(inplace=True)
      )
      (1): Sequential(
        (0): AdaptiveAvgPool2d(output_size=2)
        (1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (3): ReLU(inplace=True)
      )
      (2): Sequential(
        (0): AdaptiveAvgPool2d(output_size=3)
        (1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (3): ReLU(inplace=True)
      )
      (3): Sequential(
        (0): AdaptiveAvgPool2d(output_size=6)
        (1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (3): ReLU(inplace=True)
      )
    )
    (cbr_deepsup): Sequential(
      (0): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (conv_last): Sequential(
      (0): Conv2d(4096, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): SynchronizedBatchNorm2d(512, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Dropout2d(p=0.1, inplace=False)
      (4): Conv2d(512, 150, kernel_size=(1, 1), stride=(1, 1))
    )
    (conv_last_deepsup): Conv2d(512, 150, kernel_size=(1, 1), stride=(1, 1))
    (dropout_deepsup): Dropout2d(p=0.1, inplace=False)
  )
  (2): NLLLoss()
)

I appreciate any help!

Can you share the model itself? A simple test around interpolate (using the nightly version of PyTorch) seems to indicate that it works

def test_interpolate(x):
    return nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)

# this is successful
x = torch.jit.trace(test_interpolate, (torch.randn(1, 3, 224, 224)))
print(x.graph)

If the bug persists, you might have better luck using torch.jit.script around the area that calls interpolate.