@glaringlee, I am trying to return the path of the images in the batch (while training). In order to do that I created a torch::data::PathDataset
class with the 2 types in the torch::data::Example
class, plus a std::string ImgPath
. I followed your suggestion and removed override from the get function. Still I had the same error as @lampadephoria, which I fixed by creating a torch::data:datasets::CustomDataset
class.
using Data = std::vector<std::pair<std::string, long>>;
namespace torch {
namespace data {
/// A dataset consists of data, an associated target (label) and the associated path.
template <typename Output = torch::Tensor, typename Target = torch::Tensor, typename ImgPath = std::string>
struct PathDataset {
using DataType = Output;
using TargetType = Target;
using ImgPathType = ImgPath;
PathDataset() = default;
PathDataset(Output data, Target target, ImgPath img_path) : data(std::move(data)), target(std::move(target)), img_path(std::move(img_path)) {}
Output data;
Target target;
ImgPath img_path;
};
} // data
} // torch
namespace torch{
namespace data{
namespace datasets{
template <typename Self, typename SingleExample = PathDataset<>>
class CustomDataset : public BatchDataset<Self, std::vector<SingleExample>> {
public:
using ExampleType = SingleExample;
/// Returns the example at the given index.
virtual ExampleType get(size_t index) = 0;
/// Returns a batch of data.
/// The default implementation calls `get()` for every requested index
/// in the batch.
std::vector<ExampleType> get_batch(ArrayRef<size_t> indices) override {
std::vector<ExampleType> batch;
batch.reserve(indices.size());
for (const auto i : indices) {
batch.push_back(get(i));
}
return batch;
}
};
}
}
}
class CustomDataset : public torch::data::datasets::CustomDataset<CustomDataset>
{
private:
Data data;
Options options;
public:
explicit CustomDataset(const Data& data) : data(data) {};
torch::data::PathDataset<> get(size_t index) {
std::string path = options.datasetPath + data[index].first;
auto mat = cv::imread(path);
assert(!mat.empty());
cv::resize(mat, mat, cv::Size(options.image_size, options.image_size));
std::vector<cv::Mat> channels(1);
cv::split(mat, channels);
// this is for 1 channel images (e.g. mnist)
auto pixel_values_tensor = torch::from_blob(
channels[0].ptr(),
{options.image_size, options.image_size},
torch::kUInt8
);
auto tdata = pixel_values_tensor
.view({1, options.image_size, options.image_size})
.to(torch::kFloat);
auto tlabel = torch::from_blob(&data[index].second, {1}, torch::kLong);
return {tdata, tlabel, path};
};
torch::optional<size_t> size() const {
return data.size();
}
};
No errors here, but when I try to create a training set:
auto train_set = CustomDataset(data.first).map(torch::data::transforms::Stack<>());
I have the following:
error: static assertion failed: BatchType type of dataset does not match input type of transform
[build] 110 | typename TransformType::InputBatchType>::value,
[build] | ^~~~~
and
error: cannot convert ‘vector<torch::data::PathDataset<>,allocator<torch::data::PathDataset<>>>’ to ‘vector<torch::data::Example<>,allocator<torch::data::Example<>>>’
[build] 75 | return transform_.apply_batch(dataset_.get_batch(std::move(indices)));
[build] | ^
I would appreciate it if you could help me or direct me to an example in the documentation.