The issue which I am trying to solve, is to improve the running time of the object extraction. I would like to know if there is a more efficient way to do the portion of code bellow(since I observed it takes the a lot of time ). The variable yolo_output is the output of the yolov5 neural net.
int confidenceIndex=4;
int labelStart=5;
torch::Tensor conf = yolo_output.squeeze(0).
index({Slice(None),Slice(confidenceIndex,confidenceIndex+1)})
.squeeze(1);
torch::Tensor keep = torch::gt(conf,0.3);
yolo_output = yolo_output.index({Slice(None),keep,Slice(None)});
torch::Tensor scores = yolo_output.squeeze(0).
index({Slice(None),Slice(labelStart,None)});
torch::Tensor score_pos = scores.argmax(1);
scores = std::get<0>(torch::max(scores,1,false));
torch::Tensor final_keep = scores * yolo_output.squeeze(0).
index({Slice(None),Slice(confidenceIndex,confidenceIndex+1)})
.squeeze(1);
scores=final_keep;
if(final_keep.numel() == 0)
{
std::vector<yolo_element> yolo_elements;
return yolo_elements;
}
final_keep = torch::gt(final_keep,0.3);
yolo_output = yolo_output.index({Slice(None),final_keep,Slice(None)});
scores = scores.index({final_keep});
if(scores.numel() == 0)
{
std::vector<yolo_element> yolo_elements;
return yolo_elements;
}
score_pos = score_pos.index({final_keep});
yolo_output=yolo_output.to(torch::kCPU);
float *out = yolo_output.data_ptr<float>();
int v1,v2;
v1 = yolo_output.size(1);
v2 = yolo_output.size(2);
std::vector<yolo_element> yolo_elements(v1);
for(int i = 0;i < v1;i++)
{
int index=i*v2;
int tx = (out[index]-out[index+2]/2);
int ty = (out[index+1]-out[index+3]/2)* 0.9375;
int bx = (out[index]+out[index+2]/2);
int by = (out[index+1]+out[index+3]/2) * 0.9375;
yolo_element el = {Point(tx,ty),Point(bx,by),scores[i].item<float>(),score_pos[i].item<float>()};
yolo_elements[i] = el;
}
return yolo_elements;