Hi All. I have a C++ (code running with success) piece of code that executes a Module. I wrote a function that can understand automatically what output has returned from the Moddle.forward() call. Here is the full code:
/* collect all outputs of a given Pytorch model
@param [in] pytorchOutputs - can be torch::Tuple, torch::GenericDict, torch::List
@return 0 on success
*/
static int pytorchCollectModelOutputs(
c10::IValue &pytorchOutputs,
std::vector<torch::Tensor>&pytorchTensorsToProcess
) {
const char *err = "Error in pytorchCollectModelOutputs(): Expecting parameter 'pred' as type: Tuple,GenericDict,TensorList or List\n";
if (pytorchOutputs.isTuple()) {
auto &predArray = pytorchOutputs.toTuple()->elements();
const int n_outputs = predArray.size();
for (int i=0; i<n_outputs; ++i) {
torch::Tensor predPyTorchTensor = predArray[i].toTensor();
pytorchTensorsToProcess.push_back(predPyTorchTensor);
}
}
else if (pytorchOutputs.isGenericDict()) {
auto genDict = pytorchOutputs.toGenericDict();
for (auto& item : genDict) {
torch::Tensor predPyTorchTensor = item.value().toTensor(); // you can access item.key() and item.value()
pytorchTensorsToProcess.push_back(predPyTorchTensor);
}
}
else if (pytorchOutputs.isList()) {
auto predArray = pytorchOutputs.toList();
for (size_t i = 0; i < predArray.size(); ++i) {
torch::Tensor predPyTorchTensor = predArray.get(i).toTensor();
pytorchTensorsToProcess.push_back(predPyTorchTensor);
}
}
else if (pytorchOutputs.isDouble()) {
double v = pytorchOutputs.toDouble();
std::cerr << err << " Got pred=" << pytorchOutputs << std::endl;
return 1; // error
}
else if (pytorchOutputs.isDoubleList()) {
c10::List<double> v = pytorchOutputs.toDoubleList();
std::cerr << err << " Got pred=" << pytorchOutputs << std::endl;
return 1; // error
}
else if(pytorchOutputs.isTensorList()) {
c10::List<at::Tensor> v = pytorchOutputs.toTensorList();
for (size_t i=0; i<v.size(); ++i) {
pytorchTensorsToProcess.push_back(v[i]);
}
}
else if(pytorchOutputs.isScalar()) {
at::Scalar v = pytorchOutputs.toScalar();
std::cerr << err << " Got pred=" << pytorchOutputs << std::endl;
return 1; // error
}
else if(pytorchOutputs.isTensor()) {
at::Tensor v = pytorchOutputs.toTensor();
pytorchTensorsToProcess.push_back(v);
}
else if(pytorchOutputs.isFuture()) {
std::cerr << err << " Got pred=" << pytorchOutputs << std::endl;
return 1; // error
}
else {
std::cerr << err << " Got pred=" << pytorchOutputs << std::endl;
return 1; // error
}
return 0;
}
Now, my question is: how can I preallocate all the CUDA output buffers before the forward() call and ask libtorch C++ to fill in these buffers during the Module.forward() call.