When I try to move tensors originating from CUDA device to CPU as in the last line of below snippet, I get. I know the code snippet is not reproducible but It was hard to provide a reproducible piece of code so please excuse me.
terminate called after throwing an instance of 'c10::Error'
what(): CUDA error: an illegal memory access was encountered
Exception raised from copy_kernel_cuda at /pytorch/aten/src/ATen/native/cuda/Copy.cu:200 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x69 (0x7fde33e23b29 in /home/orcun/.libs/libtorch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0xd2 (0x7fde33e20ab2 in /home/orcun/.libs/libtorch/lib/libc10.so)
frame #2: <unknown function> + 0x1c03e6f (0x7fde4854ce6f in /home/orcun/.libs/libtorch/lib/libtorch_cuda_cu.so)
frame #3: <unknown function> + 0xf72a93 (0x7fde34fd0a93 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #4: <unknown function> + 0xf70e43 (0x7fde34fcee43 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #5: at::native::copy_(at::Tensor&, at::Tensor const&, bool) + 0x53 (0x7fde34fcfcd3 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #6: at::Tensor::copy_(at::Tensor const&, bool) const + 0x12d (0x7fde35a925cd in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #7: <unknown function> + 0x39dfef7 (0x7fde37a3def7 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #8: at::Tensor::copy_(at::Tensor const&, bool) const + 0x12d (0x7fde35a925cd in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #9: at::native::to(at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>) + 0xcc9 (0x7fde3525aaf9 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0x1898009 (0x7fde358f6009 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x18980ab (0x7fde358f60ab in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #12: <unknown function> + 0x1a82004 (0x7fde35ae0004 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #13: at::Tensor::to(c10::TensorOptions, bool, bool, c10::optional<c10::MemoryFormat>) const + 0x1d0 (0x7fde35ab6820 in /home/orcun/.libs/libtorch/lib/libtorch_cpu.so)
frame #14: centerpoint::CenterPointTRT::postProcessSingleHead(at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor) + 0xe6c (0x7fdea99067e4 in /home/orcun/Main.Drive/devel/lib/liblib_centerpointv2.so)
frame #15: centerpoint::CenterPointTRT::detect(sensor_msgs::PointCloud2_<std::allocator<void> > const&, tf2_ros::Buffer const&) + 0x111a (0x7fdea990432a in /home/orcun/Main.Drive/devel/lib/liblib_centerpointv2.so)
frame #16: <unknown function> + 0x1df92 (0x55759074af92 in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #17: <unknown function> + 0x36234 (0x557590763234 in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #18: <unknown function> + 0x34f0f (0x557590761f0f in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #19: <unknown function> + 0x33f69 (0x557590760f69 in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #20: <unknown function> + 0x32631 (0x55759075f631 in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #21: <unknown function> + 0x3c86f (0x55759076986f in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #22: <unknown function> + 0x3bf05 (0x557590768f05 in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #23: ros::SubscriptionQueue::call() + 0x832 (0x7fdea92eee92 in /opt/ros/melodic/lib/libroscpp.so)
frame #24: ros::CallbackQueue::callOneCB(ros::CallbackQueue::TLS*) + 0x589 (0x7fdea9299559 in /opt/ros/melodic/lib/libroscpp.so)
frame #25: ros::CallbackQueue::callAvailable(ros::WallDuration) + 0x36b (0x7fdea929b2fb in /opt/ros/melodic/lib/libroscpp.so)
frame #26: ros::SingleThreadedSpinner::spin(ros::CallbackQueue*) + 0x309 (0x7fdea92f2a39 in /opt/ros/melodic/lib/libroscpp.so)
frame #27: ros::spin() + 0x2b (0x7fdea92db37b in /opt/ros/melodic/lib/libroscpp.so)
frame #28: <unknown function> + 0x3e363 (0x55759076b363 in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
frame #29: __libc_start_main + 0xe7 (0x7fddba2a5c87 in /lib/x86_64-linux-gnu/libc.so.6)
frame #30: <unknown function> + 0x1b46a (0x55759074846a in /home/orcun/Main.Drive/devel/lib/lidar_centerpointv2/lidar_centerpoint_nodev2)
at::Tensor CenterPointTRT::postProcessSingleHead(at::Tensor task_heatmap_t_, at::Tensor task_offsets_t_, at::Tensor task_z_t_,
at::Tensor task_dim_t_, at::Tensor task_rot_t_)
auto topk_tuple = select_topk(task_heatmap_t_, Config::max_num_output_objects);
at::Tensor scores = std::get<0>(topk_tuple);
at::Tensor index = std::get<1>(topk_tuple);
at::Tensor classes = std::get<2>(topk_tuple);
at::Tensor ys = std::get<3>(topk_tuple);
at::Tensor xs = std::get<4>(topk_tuple);
at::Tensor offset_poi = select_point_of_interest(index, task_offsets_t_);
at::Tensor z_poi = select_point_of_interest(index, task_z_t_);
at::Tensor dim_poi = select_point_of_interest(index, task_dim_t_);
at::Tensor rot_poi = select_point_of_interest(index, task_rot_t_);
at::Tensor x = Config::voxel_size_x * Config::downsample_factor *
(xs.view({1, -1, 1}) + offset_poi.slice(2, 0, 1)) +
Config::pointcloud_range_xmin;
at::Tensor y = Config::voxel_size_y * Config::downsample_factor *
(ys.view({1, -1, 1}) + offset_poi.slice(2, 1, 2)) +
Config::pointcloud_range_ymin;
dim_poi = torch::exp(dim_poi);
at::Tensor rot = torch::atan2(rot_poi.slice(2, 0, 1), rot_poi.slice(2, 1, 2));
rot = -rot - pi / 2;
at::Tensor boxes3d = torch::cat({scores.view({1, -1, 1}), classes.view({1, -1, 1}), x, y, z_poi, dim_poi, rot}, /*dim=*/2)
.contiguous().to(torch::kFloat32).to(torch::kCPU);