I have a PyTorch model that looks like that:
Summary
class ResBlock(nn.Module, ABC):
def __init__(self, in_channels: int, filters: int, conv_num: int):
super(ResBlock, self).__init__()
self.filters = filters
self.conv_num = conv_num
self.input_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=1)
self.inner_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=3, padding=1)
self.outer_conv = torch.nn.Conv1d(in_channels=filters, out_channels=filters, kernel_size=3, padding=1)
self.max_pool = torch.nn.MaxPool1d(kernel_size=2, stride=2)
def forward(self, x):
y = x
for i in range(self.conv_num - 1):
if i == 0:
y = self.inner_conv(y)
else:
y = self.outer_conv(y)
y = torch.relu(y)
y = self.outer_conv(y)
s = self.input_conv(x)
y = s + y
y = torch.relu(y)
return self.max_pool(y)
class Net(nn.Module, ABC):
def __init__(self, num_of_classes: int):
super(Net, self).__init__()
self.block_1 = ResBlock(1, 16, 2)
self.block_2 = ResBlock(16, 32, 2)
self.block_3 = ResBlock(32, 64, 3)
self.block_4 = ResBlock(64, 128, 3)
self.block_5 = ResBlock(128, 128, 3)
self.avg_pool = torch.nn.AvgPool1d(kernel_size=3, stride=3)
self.flatten = torch.nn.Flatten()
self.dense_1 = torch.nn.Linear(
in_features=self.block_5.filters * (249 // self.avg_pool.kernel_size[0]),
out_features=256
)
self.dense_2 = torch.nn.Linear(in_features=256, out_features=128)
self.classifier = torch.nn.Linear(in_features=128, out_features=num_of_classes)
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
x = self.block_5(x)
x = self.avg_pool(x)
x = self.flatten(x)
x = self.dense_1(x)
x = self.dense_2(x)
x = self.classifier(x)
return x
I have trained that model in python, acquired ~65% of accuracy on the test set and wanted to transfer it to C++. So here it goes. Exported it like so (of course after training):
# Training code...
jit_model = torch.jit.script(model)
jit_model.save('torchscript-model.pt')
So, the next logical step was to import it into the C++:
Summary
#include <torch/torch.h>
#include <torch/script.h>
#include "constants.hh"
int main () {
torch::manual_seed(1);
torch::Device device(torch::cuda::is_available() ? torch::kCUDA : torch::kCPU);
auto model = torch::jit::load("./torchscript-model.pt");
model.to(device);
auto test_raw_dataset = CsvDataset(constants::kTestCsv);
auto test_dataset = test_raw_dataset.map(torch::data::transforms::Stack<>());
auto test_data_loader = torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(std::move(test_dataset), torch::data::DataLoaderOptions(constants::kBatchSize));
size_t correct_count = 0;
for (const auto& batch : *test_data_loader) {
auto inputs = batch.data.to(device);
auto labels = batch.target.to(device);
inputs = inputs.unsqueeze(1);
labels = labels.squeeze(1);
auto outputs = model.forward(inputs).toTensor();
auto prediction = outputs.argmax(1);
correct_count += prediction.eq(labels).sum().item<int64_t>();
}
// Printing metrics...
}
Running it showed some weird problem. Accuracy on the same test set is now equal to ~12%.
Dataset is the same. It goes into the network in the same order as in python and I’m getting totally different resulsts.
Scripts are running on CPU only.
Debbuging
So I tried to debug it and this is what I’ve found:
- All of the predictions when model is loaded into C++ are the same. (Equal to 6)
- When tried to load exported model into Python (in the same way as in C++) results are good. Accuracy is equal to the same ~65%.
- Summing, calculating predictions are results are the same on each step in C++ and python. Only outputs from the net differs.
And that’s all. Maybe you can see If I’m doing something wrong, or is it error within the libtorch.
Thanks in advance.