Hi. I’m trying to write a c++ extension for PyTorch and some problem occur. Below is a minimal example.
std::vector<at::Tensor> weight_backward(const at::Tensor &dw,const at::Tensor &query)
{
auto dq = at::zeros_like(query);
auto dk = at::zeros_like(query);
//just to test the value
std::cout<<dw<<std::endl;
double *dtmp=dw.data<double>();
for(int i=0;i<1*3*4*4;i++)
{
std::cout<<dtmp[i]<<'\t';
if((i+1)%4==0)
std::cout<<std::endl;
}
// backward function
return {dq,dk};
}
The tensor dw is filled with one in python.but its output is like below.
//print in python
//print(dw)
tensor([[[[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.]],
[[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.]],
[[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.]]]])
//output of that backward function above
//std::cout<<dw<<std::endl;
(1,1,.,.) =
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
(1,2,.,.) =
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
(1,3,.,.) =
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
[ Variable{1,3,4,4} ]
// double *dtmp=dw.data<double>();
// for(int i=0;i<1*3*4*4;i++)
// {
// std::cout<<dtmp[i]<<'\t';
// if((i+1)%4==0)
// std::cout<<std::endl;
// }
1 3.07949e-41 5.60519e-45 0
4.2039e-45 0 1.32423e-42 0
5.82094e-13 4.57118e-41 5.8236e-13 4.57118e-41
-nan -nan 2.8026e-45 0
1.32404e+21 3.07949e-41 0 0
6.09951e-13 4.57118e-41 4.48416e-44 0
0 0 1.00571e+21 3.07949e-41
0 0 0 0
0 0 1.34718e+22 3.07949e-41
1.02515e+21 3.07949e-41 1.34718e+22 3.07949e-41
1.34721e+22 3.07949e-41 1.34721e+22 3.07949e-41
9.51603e+20 3.07949e-41 9.90819e+20 3.07949e-41
I don’t know why it outputs such random value instead of one.
And also I tried to change it like below.
std::vector<at::Tensor> weight_backward(const at::Tensor &dw,const at::Tensor &query)
{
//auto dq = at::zeros_like(query);
//auto dk = at::zeros_like(query);
//just to test the value
dw1=at::zeros_like(dw);
std::cout<<dw1<<std::endl;
double *dtmp=dw1.data<double>();
for(int i=0;i<1*3*4*4;i++)
{
std::cout<<dtmp[i]<<'\t';
if((i+1)%4==0)
std::cout<<std::endl;
}
// backward function
return {dq,dk};
}
That performs ok though it means nothing.
Could anybody tell me what exactly happens when it converts from CPUFloatTensor to double array? Thanks a lot.