Thanks @xta0, that works great. I am using your iOS sample project as a template, and have run into a big problem that I was wondering if you could help with (I can make another thread if needed).
The traced model outputs very different results in Swift than it does in Python, and I don’t know why. I followed your steps in the sample project:
First, trace the model and see the output on a tensor of ones:
model = ExplicitSpeechResModel(36, 19)
model.load('../output/model_1h.pt')
model.eval()
ones = torch.ones([1, 101, 40])
traced_script_module = torch.jit.trace(model, ones)
traced_script_module.save("../output/traced_model.pt")
traced_script_module(ones)
//outputs:
//tensor([[-18.7624, -30.3478, -31.0299, -1.1888, 8.6857, 33.7217, -36.2783,
// 51.8277, 55.9391, 9.0642, 8.6428, 10.3509, 0.2688, 43.9576,
// -7.1114, -55.3318, -16.7983, -13.5788, -3.9336, -1.1792, 14.3855,
//-31.8519, 101.3712, -43.9597, 40.5726, -16.2946, -15.8538, 21.1088,
//-31.5852, -14.2146, -14.5817, 19.9373, -21.5292, 9.4006, -45.0686,
// 21.4724]], grad_fn=<DifferentiableGraphBackward>)
On the Swift side, here is my TorchModule.mm. As you will see, the output on the same tensor is completely different:
@implementation TorchModule {
@protected
torch::jit::script::Module _impl;
}
- (nullable instancetype)initWithFileAtPath:(NSString*)filePath {
self = [super init];
if (self) {
try {
auto qengines = at::globalContext().supportedQEngines();
if (std::find(qengines.begin(), qengines.end(), at::QEngine::QNNPACK) != qengines.end()) {
at::globalContext().setQEngine(at::QEngine::QNNPACK);
}
_impl = torch::jit::load(filePath.UTF8String);
_impl.eval();
} catch (const std::exception& exception) {
NSLog(@"%s", exception.what());
return nil;
}
}
return self;
}
- (NSArray<NSNumber*>*)predictImage:(void*)imageBuffer {
try {
// try out dummy input of all ones
at::Tensor tensor = torch::ones({1, 101, 40});
torch::autograd::AutoGradMode guard(false);
at::AutoNonVariableTypeMode non_var_type_mode(true);
auto outputTensor = _impl.forward({tensor}).toTensor();
float* floatBuffer = outputTensor.data_ptr<float>();
if (!floatBuffer) {
return nil;
}
NSMutableArray* results = [[NSMutableArray alloc] init];
for (int i = 0; i < 36; i++) {
[results addObject:@(floatBuffer[i])];
}
return [results copy];
} catch (const std::exception& exception) {
NSLog(@"%s", exception.what());
}
return nil;
}
@end
Calling predictImage results in the following vector:
[11.918683, -21.391111, -18.756794, -15.70252, -14.593732, 28.798603, -22.37965, 10.117706, 5.1135015, -8.376111, -25.258512, -7.270096, 0.9224758, 3.4262152, 28.566887, 2.90841, 25.247177, 35.124638, 14.7190695, -37.291008, -4.821145, 33.09956, 47.47553, 11.395653, 9.54897, -5.713372, -32.897644, -18.26301, -5.596691, -18.339537, -25.02614, -23.303043, -3.3603168, 31.69397, 3.0528922, 7.3663263]
It is totally different from the one in Python. The model weights seem to be the same, I’m calling model.eval() … not sure what else is missing. Here is my model architecture in case it is helpful:
class ExplicitSpeechResModel(SerializableModule):
def __init__(self, n_labels, n_maps):
super().__init__()
self.conv0 = nn.Conv2d(1, n_maps, (3, 3), padding=(1, 1), bias=False)
self.conv1 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(0 // 3)), dilation=int(2**(0 // 3)), bias=False)
self.bn1 = nn.BatchNorm2d(n_maps, affine=False)
self.conv2 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(1 // 3)), dilation=int(2**(1 // 3)), bias=False)
self.bn2 = nn.BatchNorm2d(n_maps, affine=False)
self.conv3 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(2 // 3)), dilation=int(2**(2 // 3)), bias=False)
self.bn3 = nn.BatchNorm2d(n_maps, affine=False)
self.conv4 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(3 // 3)), dilation=int(2**(3 // 3)), bias=False)
self.bn4 = nn.BatchNorm2d(n_maps, affine=False)
self.conv5 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(4 // 3)), dilation=int(2**(4 // 3)), bias=False)
self.bn5 = nn.BatchNorm2d(n_maps, affine=False)
self.conv6 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(5 // 3)), dilation=int(2**(5 // 3)), bias=False)
self.bn6 = nn.BatchNorm2d(n_maps, affine=False)
self.conv7 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(6 // 3)), dilation=int(2**(6 // 3)), bias=False)
self.bn7 = nn.BatchNorm2d(n_maps, affine=False)
self.conv8 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(7 // 3)), dilation=int(2**(7 // 3)), bias=False)
self.bn8 = nn.BatchNorm2d(n_maps, affine=False)
self.conv9 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(8 // 3)), dilation=int(2**(8 // 3)), bias=False)
self.bn9 = nn.BatchNorm2d(n_maps, affine=False)
self.conv10 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(9 // 3)), dilation=int(2**(9 // 3)), bias=False)
self.bn10 = nn.BatchNorm2d(n_maps, affine=False)
self.conv11 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(10 // 3)), dilation=int(2**(10 // 3)), bias=False)
self.bn11 = nn.BatchNorm2d(n_maps, affine=False)
self.conv12 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(11 // 3)), dilation=int(2**(11 // 3)), bias=False)
self.bn12 = nn.BatchNorm2d(n_maps, affine=False)
self.conv13 = nn.Conv2d(n_maps, n_maps, (3, 3), padding=int(2**(12 // 3)), dilation=int(2**(12 // 3)), bias=False)
self.bn13 = nn.BatchNorm2d(n_maps, affine=False)
self.output = nn.Linear(n_maps, n_labels)
def forward(self, x):
x = x.unsqueeze(1)
y0 = F.relu(self.conv0(x))
x = self.bn1(F.relu(self.conv1(y0)))
y2 = F.relu(self.conv2(x)) + y0
x = self.bn2(y2)
x = self.bn3(F.relu(self.conv3(x)))
y4 = F.relu(self.conv4(x)) + y2
x = self.bn4(y4)
x = self.bn5(F.relu(self.conv5(x)))
y6 = F.relu(self.conv6(x)) + y4
x = self.bn6(y6)
x = self.bn7(F.relu(self.conv7(x)))
y8 = F.relu(self.conv8(x)) + y6
x = self.bn8(y8)
x = self.bn9(F.relu(self.conv9(x)))
y10 = F.relu(self.conv10(x)) + y8
x = self.bn10(y10)
x = self.bn11(F.relu(self.conv11(x)))
y12 = F.relu(self.conv12(x)) + y10
x = self.bn12(y12)
x = self.bn13(F.relu(self.conv13(x)))
x = x.view(x.size(0), x.size(1), -1) # shape: (batch, feats, o3)
x = torch.mean(x, 2)
return self.output(x)
Sorry for the long post. I really appreciate your help so far, and the sample project you made as well.