Hanging at tensor.to(torch.float32)

it hangs at loading the pretrained model and here is the call stack.

torch==1.7.1

Any idea on how to fix it? Thanks

Traceback (most recent call first):
  File "/opt/conda/lib/python3.7/site-packages/apex/amp/_initialize.py", line 141, in __call__
    param = param.to(torch.float32)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 909, in state_dict
    hook_result = hook(self, destination, prefix, local_metadata)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 907, in state_dict
    module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
  (frame information optimized out)
  (frame information optimized out)
  File "/tmp/code/quickdetection/src/qd/opt/checkpoint.py", line 267, in load
    self._load_model(checkpoint)
  File "/tmp/code/quickdetection/src/qd/opt/checkpoint.py", line 249, in recover_or_load
    return self.load(f, model_only, load_if_has)
  (frame information optimized out)
  (frame information optimized out)
  File "/tmp/code/quickdetection/src/qd/pipeline.py", line 674, in pipeline_train_eval_multi
    pip.ensure_train()
  File "src/qd/qd_common.py", line 3351, in execute_func
  File "src/qd/qd_common.py", line 3827, in <module>

call stack about c code

(gdb) where
#0  0x00007f8803891ef7 in sched_yield () at ../sysdeps/unix/syscall-template.S:78
#1  0x00007f86fa6e62de in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#2  0x00007f86fa6e6bb3 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3  0x00007f86fa990fc5 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4  0x00007f86fa99117c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#5  0x00007f86fa991227 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6  0x00007f86fa7c2b49 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#7  0x00007f86fa966d70 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8  0x00007f86fa6c5bd7 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9  0x00007f86fa6c6040 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#10 0x00007f86fa6c612e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#11 0x00007f86fa891542 in cuLaunchKernel () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#12 0x00007f87bc7b162b in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/../../../../libcudart.so.11.0
#13 0x00007f87bc7f1636 in cudaLaunchKernel () from /opt/conda/lib/python3.7/site-packages/torch/lib/../../../../libcudart.so.11.0
#14 0x00007f875868f36c in __device_stub__ZN2at6native27unrolled_elementwise_kernelIZZZNS0_21copy_device_to_deviceERNS_14TensorIteratorEbENKUlvE0_clEvENKUlvE2_clEvEUnvhdl0_1_0_PFvS3_bENS0_21copy_dev$
ce_to_deviceE7_ffENS_6detail5ArrayIPcLi2EEE23TrivialOffsetCalculatorILi1EjESE_NS0_6memory12LoadWithCastILi1EEENSF_13StoreWithCastEEEviT_T0_T1_T2_T3_T4_(int, __nv_hdl_wrapper_t<false, true, __nv_dl_$
ag<void (*)(at::TensorIterator&, bool), &at::native::copy_device_to_device, 7u>, float (float)>&, at::detail::Array<char*, 2>&, TrivialOffsetCalculator<1, unsigned int>&, TrivialOffsetCalculator<1,
unsigned int>&, at::native::memory::LoadWithCast<1>&, at::native::memory::StoreWithCast&) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so
#15 0x00007f87586a1d55 in void at::native::gpu_kernel_impl<__nv_hdl_wrapper_t<false, true, __nv_dl_tag<void (*)(at::TensorIterator&, bool), &at::native::copy_device_to_device, 7u>, float (float)> >$
at::TensorIterator&, __nv_hdl_wrapper_t<false, true, __nv_dl_tag<void (*)(at::TensorIterator&, bool), &at::native::copy_device_to_device, 7u>, float (float)> const&) ()
   from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so
#16 0x00007f87586a294b in void at::native::gpu_kernel<__nv_hdl_wrapper_t<false, true, __nv_dl_tag<void (*)(at::TensorIterator&, bool), &at::native::copy_device_to_device, 7u>, float (float)> >(at::$
ensorIterator&, __nv_hdl_wrapper_t<false, true, __nv_dl_tag<void (*)(at::TensorIterator&, bool), &at::native::copy_device_to_device, 7u>, float (float)> const&) ()
   from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so
#17 0x00007f875869337f in at::native::copy_device_to_device(at::TensorIterator&, bool) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so
#18 0x00007f87586956b7 in at::native::copy_kernel_cuda(at::TensorIterator&, bool) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so
#19 0x00007f87a9ba9f19 in void at::native::DispatchStub<void (*)(at::TensorIterator&, bool), at::native::copy_stub>::operator()<at::TensorIterator&, bool&>(c10::DeviceType, at::TensorIterator&, boo$
&) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#20 0x00007f87a9ba8b4f in at::native::copy_impl(at::Tensor&, at::Tensor const&, bool) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#21 0x00007f87a9ba9de4 in at::native::copy_(at::Tensor&, at::Tensor const&, bool) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#22 0x00007f87aa2ec4dc in at::Tensor& c10::Dispatcher::callWithDispatchKey<at::Tensor&, at::Tensor&, at::Tensor const&, bool>(c10::TypedOperatorHandle<at::Tensor& (at::Tensor&, at::Tensor const&, b$
ol)> const&, c10::DispatchKey, at::Tensor&, at::Tensor const&, bool) const () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#23 0x00007f87aa3f4218 in at::Tensor::copy_(at::Tensor const&, bool) const () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#24 0x00007f87abfebd39 in torch::autograd::VariableType::(anonymous namespace)::copy_(at::Tensor&, at::Tensor const&, bool) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#25 0x00007f87aa2ec4dc in at::Tensor& c10::Dispatcher::callWithDispatchKey<at::Tensor&, at::Tensor&, at::Tensor const&, bool>(c10::TypedOperatorHandle<at::Tensor& (at::Tensor&, at::Tensor const&, b$
ol)> const&, c10::DispatchKey, at::Tensor&, at::Tensor const&, bool) const () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#26 0x00007f87aa3f4218 in at::Tensor::copy_(at::Tensor const&, bool) const () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#27 0x00007f87a9e0ffc1 in at::native::to(at::Tensor const&, c10::ScalarType, bool, bool, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so
#28 0x00007f87aa33d03a in at::TypeDefault::to_dtype(at::Tensor const&, c10::ScalarType, bool, bool, c10::optional<c10::MemoryFormat>) ()
   from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so

As far as I know “to” is just to set the device not the datatype. like tensor.to(‘cuda:0’) or tensor.to(‘cpu’)
You should call tensor.float() instead.