Torch scripting not working

Bryan_Wang · July 6, 2020, 9:12pm

Here is my model definition:

class ResNetTC8(nn.Module):
    def __init__(self, n_classes, n_channels, n_mfcc):        
        super().__init__()
        conv_size = (9, 1)
        self.conv0 = nn.Conv2d(n_mfcc, n_channels[0], (3, 1), padding=(1, 0), bias=False)
        self.conv1 = nn.Conv2d(n_channels[0], n_channels[1], conv_size, padding=((101+8)//2, 0), bias=False, stride=2) 
        self.conv2 = nn.Conv2d(n_channels[1], n_channels[1], conv_size, padding=(9//2, 0), bias=False)
        self.skip_conv1 = nn.Conv2d(n_channels[0], n_channels[1], 1, padding=((101)//2, 0), bias=False, stride=2)
        self.bn1 = nn.BatchNorm2d(n_channels[1])
        self.bn2 = nn.BatchNorm2d(n_channels[1])
        self.skip_bn1 = nn.BatchNorm2d(n_channels[1])
        self.conv3 = nn.Conv2d(n_channels[1], n_channels[2], conv_size, padding=((101+8)//2, 0), bias=False, stride=2)
        self.conv4 = nn.Conv2d(n_channels[2], n_channels[2], conv_size, padding=(9//2, 0), bias=False)
        self.skip_conv2 = nn.Conv2d(n_channels[1], n_channels[2], 1, padding=((101)//2, 0), bias=False, stride=2)
        self.bn3 = nn.BatchNorm2d(n_channels[2])
        self.bn4 = nn.BatchNorm2d(n_channels[2])
        self.skip_bn2 = nn.BatchNorm2d(n_channels[2])
        self.conv5 = nn.Conv2d(n_channels[2], n_channels[3], conv_size, padding=((101+8)//2, 0), bias=False, stride=2)
        self.conv6 = nn.Conv2d(n_channels[2], n_channels[3], conv_size, padding=(9//2, 0), bias=False)
        self.skip_conv3 = nn.Conv2d(n_channels[2], n_channels[3], 1, padding=((101)//2, 0), bias=False, stride=2)
        self.bn5 = nn.BatchNorm2d(n_channels[3])
        self.bn6 = nn.BatchNorm2d(n_channels[3])
        self.skip_bn3 = nn.BatchNorm2d(n_channels[3])
        self.avg = nn.AvgPool2d((101, 1))
        self.dropout = nn.Dropout()
        self.output = nn.Linear(n_channels[3], 36) 
        
    def forward(self, x):
        x = x.reshape([-1, x.shape[2], x.shape[1], 1])
        x = self.conv0(x)
        y0 = self.bn1(F.relu(self.conv1(x)))
        x = self.bn2(F.relu(self.conv2(y0))) + self.skip_bn1(F.relu(self.skip_conv1(x)))
        y1 =  self.bn3(F.relu(self.conv3(x)))
        x = self.bn4(F.relu(self.conv4(y1))) + self.skip_bn2(F.relu(self.skip_conv2(x)))
        y2 =  self.bn5(F.relu(self.conv5(x)))
        x = self.bn6(F.relu(self.conv6(y1))) + self.skip_bn3(F.relu(self.skip_conv3(x)))
        x = self.dropout(self.avg(x)).squeeze()
        return self.output(x)

Model trains without error and scripting yields no errors in Python. However, running using Libtorch 1.5 on Xcode yields the following error:

The following operation failed in the TorchScript interpreter.
Traceback of TorchScript, serialized code (most recent call last):
  File "code/__torch__/torch/nn/modules/module/___torch_mangle_67.py", line 55, in forward
    input = torch.reshape(x, [-1, int(_22), int(_23), 1])
    _24 = (_21).forward(input, )
    input0 = torch.relu((_20).forward(_24, ))
                         ~~~~~~~~~~~~ <--- HERE
    _25 = (_18).forward((_19).forward(input0, ), )
    input1 = torch.relu(_25)
  File "code/__torch__/torch/nn/modules/module/___torch_mangle_46.py", line 8, in forward
  def forward(self: __torch__.torch.nn.modules.module.___torch_mangle_46.Module,
    argument_1: Tensor) -> Tensor:
    input = torch._convolution(argument_1, self.weight, None, [2, 2], [54, 0], [1, 1], False, [0, 0], 1, False, False, True)
            ~~~~~~~~~~~~~~~~~~ <--- HERE
    return input

Seems like it’s getting stuck on the second convolution layer, but no idea why.