I have this (to the best of my knowledge) minimal example that segfaults when using a data_loader if I use from_blob to create a tensor for my custom dataset but it works fine if I just use something like torch::empty:
#include <iostream>
#include <torch/torch.h>
#include <vector>
#define USE_FROM_BLOB
struct MyDataset : public torch::data::datasets::Dataset<MyDataset>
{
MyDataset()
{
std::vector<std::array<float, 60>> v1(1000);
#ifdef USE_FROM_BLOB
m_data =torch::from_blob(v1.data(), {(int)v1.size(), (int)v1[0].size()});
#else
m_data = torch::empty({(int)v1.size(), (int)v1[0].size()});
#endif
std::cout << "Here it works: " << m_data[123][0].item<float>() << std::endl;
}
torch::data::Example<> get([[maybe_unused]] size_t index) override
{
#ifdef USE_FROM_BLOB
std::cout << "Here it doesn't: " << std::flush;
#else
std::cout << "Here it works as well: " << std::flush;
#endif
std::cout << m_data[123][0].item<float>() << ", see?" << std::endl;
return {m_data[123], m_data[123]};
}
c10::optional<size_t> size() const override { return m_data.sizes()[0]; }
private:
torch::Tensor m_data;
};
int main()
{
auto data_loader = torch::data::make_data_loader(MyDataset());
for (auto& batch : *data_loader)
{
std::cout << batch.size() << std::endl;
break;
}
return 0;
}
Output when USE_FROM_BLOB is defined:
Here it works: 0
Here it doesn't: Segmentation fault (core dumped)
If it isn’t defined:
Here it works: 0
Here it works as well: 0, see?
1
Why is that and/or how can I fix this? I could just use torch::empty and then fill the tensor manually each element at a time, but it is really slow, compared to the from_blob.