`.to(torch::kUInt8)` position caused weird issues

I’m translating some python codes into C++.
my original codes is like this

  pred_masks = pred_masks.ge(0.5).to(torch::kUInt8);
...
  for (auto i = 0; i < pred_bboxes.sizes()[0]; i++) {
        auto res = func(pred_bboxes[i], pred_masks[i]);
  }
...
Segmentation func(
    const torch::Tensor &pred_bbox, const torch::Tensor &pred_mask) const {
...
  torch::Tensor const img =
      pred_mask.index({torch::indexing::Slice(
                           static_cast<int>(pred_bbox[1].item<double>()),
                           static_cast<int>(pred_bbox[3].item<double>()) + 1),
                       torch::indexing::Slice(
                           static_cast<int>(pred_bbox[0].item<double>()),
                           static_cast<int>(pred_bbox[2].item<double>()) + 1)});
...

but the masks are very wrong, I changed my codes to the following

  // pred_masks = pred_masks.ge(0.5).to(torch::kUInt8);
  pred_masks = pred_masks >= 0.5; // dtype: bool
  pred_masks = pred_masks.to(torch::kBool);
  std::vector<torch::Tensor> mask_vec;
  for (int i = 0; i < pred_masks.sizes()[0]; ++i) {
    mask_vec.push_back(pred_masks[i]);
  }
  pred_masks = torch::cat({mask_vec}, 0).squeeze(1);
  ...
  for (auto i = 0; i < pred_bboxes.sizes()[0]; i++) {
        auto res = func(pred_bboxes[i], pred_masks[i]);
  }
  ...

Segmentation func(
    const torch::Tensor &pred_bbox, const torch::Tensor &pred_mask) const {
...
  torch::Tensor const img =
      pred_mask.index({torch::indexing::Slice(
                           static_cast<int>(pred_bbox[1].item<double>()),
                           static_cast<int>(pred_bbox[3].item<double>()) + 1),
                       torch::indexing::Slice(
                           static_cast<int>(pred_bbox[0].item<double>()),
                           static_cast<int>(pred_bbox[2].item<double>()) + 1)})
                      .to(torch::kUInt8);
  cv::Mat const mat(static_cast<int>(img.sizes()[0]),
                    static_cast<int>(img.sizes()[1]), CV_8UC1,
                    img.data_ptr<uint8_t>());
  mask.mask = std::move(
      *cv_bridge::CvImage(std_msgs::msg::Header(), "mono8", mat).toImageMsg());
...

and now the masks are normal. it is weird why the previous version could not give correct masks.
The only difference is when .to(torch::kUInt8) happens (I found that using between .ge(0.5) or >=0.5 does not matter as well as long as I have put .to(torch::kUInt8) inside the function).
and I think pred_mask should be torch::Tensor, but it is actually at::Tensor inside the function, which is weird