Hello, I’m learning how to use torch::nn::functional::cross_entropy
to compute loss.
I’ve encouter some strange loss
result.
Code snippet:
/*Get Single Data from a batch*/
auto data_tensor = test_loader->begin()->data[0].to(device);
auto target_tensor = test_loader->begin()->target[0].to(device);
data_tensor = data_tensor.unsqueeze(0);
target_tensor = target_tensor.unsqueeze(0);
target_tensor = target_tensor.unsqueeze(0);
/*Model*/
auto in_channel = 3;
auto out_channel = 1;
auto double_conv = DoubleConv(in_channel,out_channel);
/*Optimizer*/
torch::optim::Adam optimizer(double_conv->parameters(), torch::optim::AdamOptions(learning_rate));
/*Forward*/
data_tensor.requires_grad_(true);
double_conv->to(device);
std::cout<<"data_tensor[size]: "<<data_tensor.sizes()<<std::endl;
std::cout<<"target_tensor[size]: "<<target_tensor.sizes()<<std::endl;
auto output_conv_double = double_conv->forward(data_tensor);
std::cout<<"output_conv_double[size]: "<<output_conv_double.sizes()<<std::endl;
auto loss = torch::nn::functional::cross_entropy(output_conv_double,target_tensor);
std::cout<<"loss: "<<loss<<std::endl;
std::cout<<"loss[size]: "<<loss.sizes()<<"\n"<<std::endl;
/*Backward and step*/
optimizer.zero_grad();
loss.backward();
std::cout<<"double_conv[params]: "<<double_conv->parameters()<<std::endl;
std::cout<<"\n========STEP========\n"<<std::endl;
optimizer.step();
std::cout<<"double_conv[params]: "<<double_conv->parameters()<<std::endl;
Terminal output:
data_tensor[size]: [1, 3, 300, 300]
target_tensor[size]: [1, 1, 300, 300]
output_conv_double[size]: [1, 1, 300, 300]
loss: -0
[ CUDAFloatType{} ]
loss[size]: []
double_conv[params]: (1,1,.,.) =
0.0234 0.0282 -0.0839
-0.1429 0.1813 -0.0405
-0.1422 0.0471 0.0029
(1,2,.,.) =
0.0984 -0.1271 0.1176
0.1223 0.1840 -0.1340
0.1659 -0.0649 -0.0265
(1,3,.,.) =
-0.1895 0.1487 0.1844
-0.1412 0.1739 0.1029
-0.1922 -0.1089 -0.1594
[ CUDAFloatType{1,3,3,3} ] 1
[ CUDAFloatType{1} ] 0
[ CUDAFloatType{1} ] (1,1,.,.) =
0.2240 0.1544 -0.2749
0.2366 -0.0623 0.0697
0.0303 0.1510 -0.1503
[ CUDAFloatType{1,1,3,3} ] 1
[ CUDAFloatType{1} ] 0
[ CUDAFloatType{1} ]
========STEP========
double_conv[params]: (1,1,.,.) =
0.0234 0.0282 -0.0839
-0.1429 0.1813 -0.0405
-0.1422 0.0471 0.0029
(1,2,.,.) =
0.0984 -0.1271 0.1176
0.1223 0.1840 -0.1340
0.1659 -0.0649 -0.0265
(1,3,.,.) =
-0.1895 0.1487 0.1844
-0.1412 0.1739 0.1029
-0.1922 -0.1089 -0.1594
[ CUDAFloatType{1,3,3,3} ] 1
[ CUDAFloatType{1} ] 0
[ CUDAFloatType{1} ] (1,1,.,.) =
0.2240 0.1544 -0.2749
0.2366 -0.0623 0.0697
0.0303 0.1510 -0.1503
[ CUDAFloatType{1,1,3,3} ] 1
[ CUDAFloatType{1} ] 0
[ CUDAFloatType{1} ]
The cross entropy loss between output_conv_double
and target_tersor
.
auto loss = torch::nn::functional::cross_entropy(output_conv_double,target_tensor);
The loss
value is -0.
loss: -0
The size of loss
is empty.
[ CUDAFloatType{} ]
loss[size]:
Any idea which part I’ve missed out?
cc: @ptrblck