Alright the code is quite big so I am not going to paste everything.

I have in the C++ code two types of functions, the first one :

```
void select_cube(at::Tensor xyz, at::Tensor idx_out, int b, int n,float radius)
{
auto output = idx_out.contiguous().data<int>();
auto input = xyz.contiguous().data<float>();
cubeSelectLauncher(b,n,radius,input,output);
}
```

It gives to the cuda code above tensors to manipulate.

The other one is a bit longer and manipulates directly in C++ a python tensor as follow :

```
void interpolate(int b, int n, int m, at::Tensor xyz1p, at::Tensor xyz2p, at::Tensor distp, at::Tensor idxp){
float * xyz1 = xyz1p.contiguous().data<float>();
float * xyz2 = xyz2p.contiguous().data<float>();
float * dist = distp.contiguous().data<float>();
int * idx = idxp.contiguous().data<int>();
for (int i=0;i<b;++i) {
for (int j=0;j<n;++j) {
float x1=xyz1[j*3+0];
float y1=xyz1[j*3+1];
float z1=xyz1[j*3+2];
double best1=1e40; double best2=1e40; double best3=1e40;
int besti1=0; int besti2=0; int besti3=0;
for (int k=0;k<m;++k) {
float x2=xyz2[k*3+0];
float y2=xyz2[k*3+1];
float z2=xyz2[k*3+2];
double d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
if (d<best1) {
best3=best2;
besti3=besti2;
best2=best1;
besti2=besti1;
best1=d;
besti1=k;
} else if (d<best2) {
best3=best2;
besti3=besti2;
best2=d;
besti2=k;
} else if (d<best3) {
best3=d;
besti3=k;
}
}
dist[j*3]=best1;
idx[j*3]=besti1;
dist[j*3+1]=best2;
idx[j*3+1]=besti2;
dist[j*3+2]=best3;
idx[j*3+2]=besti3;
}
xyz1+=n*3;
xyz2+=m*3;
dist+=n*3;
idx+=n*3;
}
}
```

If I am transforming correctly the Tensor to float* using data(), then the error might be coming from the second type of functions. But again the code is the same as it was in C, just changed the transformation from tensor to pointer .