Alright, I ran it with GDB and this is what I saw:
#0 0x00007ffedccbab44 in clock_gettime ()
#1 0x00007fab70bdbea6 in __GI___clock_gettime (clock_id=4, tp=0x7ffedcc243a0) at ../sysdeps/unix/clock_gettime.c:115
#2 0x00007fa925c2470e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3 0x00007fa925cfd837 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007fa925bc5b6c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#5 0x00007fa925c01660 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6 0x00007fa925b38e98 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#7 0x00007fa925b3942c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8 0x00007fab6a128a27 in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#9 0x00007fab6a1202a0 in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#10 0x00007fab6a12d6a7 in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#11 0x00007fab6a12f2c1 in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#12 0x00007fab6a12243e in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#13 0x00007fab6a111de8 in ?? () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#14 0x00007fab6a14323c in cudaMalloc () from /opt/conda/lib/python3.7/site-packages/torch/lib/libcudart-1581fefa.so.10.0
#15 0x00007fab6c9af477 in c10::cuda::CUDACachingAllocator::THCCachingAllocator::malloc(void**, unsigned long, CUstream_st*) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libc10_cuda.so
#16 0x00007fab6c9b0d5e in c10::cuda::CUDACachingAllocator::CudaCachingAllocator::allocate(unsigned long) const () from /opt/conda/lib/python3.7/site-packages/torch/lib/libc10_cuda.so
#17 0x00007fab01cad094 in at::native::empty_cuda(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#18 0x00007fab0057d8d8 in at::CUDAType::(anonymous namespace)::empty(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#19 0x00007faafdf0fc47 in c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>), at::Tensor, c10::guts::typelist::typelist<c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> > >, at::Tensor (c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>)>::call(c10::OperatorKernel*, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#20 0x00007faaffecf8a5 in torch::autograd::VariableType::(anonymous namespace)::empty(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#21 0x00007faafdf0fc47 in c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>), at::Tensor, c10::guts::typelist::typelist<c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat> > >, at::Tensor (c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>)>::call(c10::OperatorKernel*, c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#22 0x00007faafdc78456 in at::native::to_impl(at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#23 0x00007faafdc79805 in at::native::to(at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#24 0x00007faafdfbdcaa in at::TypeDefault::to(at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#25 0x00007faaffca3976 in torch::autograd::VariableType::(anonymous namespace)::to(at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>) ()
from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#26 0x00007faafe0086f2 in c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat> > >, at::Tensor (at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>)>::call(c10::OperatorKernel*, at::Tensor const&, c10::TensorOptions const&, bool, bool, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so
#27 0x00007fab4448bd80 in torch::autograd::dispatch_to(at::Tensor const&, c10::Device, bool, bool, c10::optional<c10::MemoryFormat>) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#28 0x00007fab445b7920 in torch::autograd::THPVariable_cuda(_object*, _object*, _object*) () from /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so
#29 0x0000561809be9c94 in _PyMethodDef_RawFastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:694
#30 0x0000561809bf0aef in _PyMethodDescr_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/descrobject.c:288
#31 0x0000561809c5537c in call_function (kwnames=0x0, oparg=2, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4593
#32 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3110
#33 0x0000561809b9959a in _PyEval_EvalCodeWithName () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3930
#34 0x0000561809be9497 in _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:433
#35 0x0000561809c50be6 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#36 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3124
#37 0x0000561809be920b in function_code_fastcall (globals=<optimized out>, nargs=2, args=<optimized out>, co=<optimized out>) at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:283
#38 _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:408
#39 0x0000561809c55229 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#40 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3093
#41 0x0000561809be920b in function_code_fastcall (globals=<optimized out>, nargs=2, args=<optimized out>, co=<optimized out>) at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:283
#42 _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:408
#43 0x0000561809c55229 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#44 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3093
#45 0x0000561809b99b00 in _PyEval_EvalCodeWithName () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3930
#46 0x0000561809be9497 in _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:433
#47 0x0000561809c55229 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#48 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3093
#49 0x0000561809be920b in function_code_fastcall (globals=<optimized out>, nargs=1, args=<optimized out>, co=<optimized out>) at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:283
#50 _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:408
#51 0x0000561809c55229 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#52 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3093
---Type <return> to continue, or q <return> to quit---
#53 0x0000561809be920b in function_code_fastcall (globals=<optimized out>, nargs=2, args=<optimized out>, co=<optimized out>) at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:283
#54 _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:408
#55 0x0000561809c50be6 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#56 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3124
#57 0x0000561809b992b9 in _PyEval_EvalCodeWithName () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3930
#58 0x0000561809b9a1d4 in PyEval_EvalCodeEx () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3959
#59 0x0000561809b9a1fc in PyEval_EvalCode (co=<optimized out>, globals=<optimized out>, locals=<optimized out>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:524
#60 0x0000561809c602ed in builtin_exec_impl.isra.12 (locals=0x7fab71213b40, globals=0x7fab71213b40, source=0x7fab70049540) at /tmp/build/80754af9/python_1588882889832/work/Python/bltinmodule.c:1079
#61 builtin_exec () at /tmp/build/80754af9/python_1588882889832/work/Python/clinic/bltinmodule.c.h:283
#62 0x0000561809be9b19 in _PyMethodDef_RawFastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:655
#63 0x0000561809be9db1 in _PyCFunction_FastCallKeywords (func=0x7fab7129be10, args=<optimized out>, nargs=<optimized out>, kwnames=<optimized out>) at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:734
#64 0x0000561809c54e94 in call_function (kwnames=0x0, oparg=2, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4568
#65 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3124
#66 0x0000561809b992b9 in _PyEval_EvalCodeWithName () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3930
#67 0x0000561809be9435 in _PyFunction_FastCallKeywords () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:433
#68 0x0000561809c50be6 in call_function (kwnames=0x0, oparg=<optimized out>, pp_stack=<synthetic pointer>) at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:4616
#69 _PyEval_EvalFrameDefault () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3124
#70 0x0000561809b992b9 in _PyEval_EvalCodeWithName () at /tmp/build/80754af9/python_1588882889832/work/Python/ceval.c:3930
#71 0x0000561809b9a3e5 in _PyFunction_FastCallDict () at /tmp/build/80754af9/python_1588882889832/work/Objects/call.c:376
#72 0x0000561809ca8ce7 in pymain_run_module () at /tmp/build/80754af9/python_1588882889832/work/Modules/main.c:355
#73 0x0000561809cbb60b in pymain_run_python (pymain=0x7ffedcc27390) at /tmp/build/80754af9/python_1588882889832/work/Modules/main.c:2899
#74 pymain_main () at /tmp/build/80754af9/python_1588882889832/work/Modules/main.c:3442
#75 0x0000561809cbb6fc in _Py_UnixMain () at /tmp/build/80754af9/python_1588882889832/work/Modules/main.c:3477
#76 0x00007fab70accb97 in __libc_start_main (main=0x561809b7a3a0 <main>, argc=7, argv=0x7ffedcc274e8, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffedcc274d8) at ../csu/libc-start.c:310
#77 0x0000561809c603c0 in _start () at ../sysdeps/x86_64/elf/start.S:103
Thread info is here:
Id Target Id Frame
* 1 Thread 0x7fab712d5740 (LWP 1054) "python3" 0x00007ffedccbab44 in clock_gettime ()
17 Thread 0x7fab4c36d700 (LWP 1936) "jemalloc_bg_thd" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>,
expected=0, futex_word=0x7faaf300a5f4) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
18 Thread 0x7fab4eb6e700 (LWP 1937) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
19 Thread 0x7fab5136f700 (LWP 1938) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
20 Thread 0x7fab51b70700 (LWP 1939) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
21 Thread 0x7faaf22a5700 (LWP 1940) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
22 Thread 0x7faaf1aa4700 (LWP 1941) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
23 Thread 0x7faaf12a3700 (LWP 1942) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
24 Thread 0x7faaf0aa2700 (LWP 1943) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
25 Thread 0x7faaebfff700 (LWP 1944) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
26 Thread 0x7faaeb7fe700 (LWP 1945) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
27 Thread 0x7faaeaffd700 (LWP 1946) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
28 Thread 0x7faaea7fc700 (LWP 1947) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
29 Thread 0x7faae9ffb700 (LWP 1948) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
30 Thread 0x7faae97fa700 (LWP 1949) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
31 Thread 0x7faae8ff9700 (LWP 1950) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
32 Thread 0x7faae87f8700 (LWP 1951) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
33 Thread 0x7faae7ff7700 (LWP 1952) "python3" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>, expected=0,
futex_word=0x56180c828b40) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
34 Thread 0x7faae71ff700 (LWP 1953) "jemalloc_bg_thd" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>,
expected=0, futex_word=0x7faaf300a6c4) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
35 Thread 0x7faae61ff700 (LWP 1954) "jemalloc_bg_thd" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>,
expected=0, futex_word=0x7faaf300a790) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
36 Thread 0x7faae4fff700 (LWP 1955) "jemalloc_bg_thd" 0x00007fab70ea99f3 in futex_wait_cancelable (private=<optimized out>,
expected=0, futex_word=0x7faaf300a864) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
39 Thread 0x7faad0dcb700 (LWP 2819) "python3" 0x00007fab70bce237 in accept4 (fd=10, addr=..., addr_len=0x7faad0dcadf8,
flags=524288) at ../sysdeps/unix/sysv/linux/accept4.c:32
40 Thread 0x7fa942fff700 (LWP 2820) "python3" 0x00007fab70bbfbf9 in __GI___poll (fds=0x7fa8c0000bd0, nfds=10, timeout=100)
at ../sysdeps/unix/sysv/linux/poll.c:29
Does that give us anything useful?
Still working on a pared down version so I can post some code.