When I try to run PyTorch using the latest docker image (nvcr.io/nvidia/pytorch:22.08-py3) it breaks on import torch
giving the following stacktrace:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/mnt/home/.local/lib/python3.8/site-packages/torch/__init__.py", line 811, in <module>
from .functional import * # noqa: F403
File "/mnt/home/.local/lib/python3.8/site-packages/torch/functional.py", line 7, in <module>
import torch.nn.functional as F
File "/mnt/home/.local/lib/python3.8/site-packages/torch/nn/__init__.py", line 1, in <module>
from .modules import * # noqa: F403
File "/mnt/home/.local/lib/python3.8/site-packages/torch/nn/modules/__init__.py", line 2, in <module>
from .linear import Identity, Linear, Bilinear, LazyLinear
File "/mnt/home/.local/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 7, in <module>
from .. import functional as F
File "/mnt/home/.local/lib/python3.8/site-packages/torch/nn/functional.py", line 18, in <module>
from .._jit_internal import boolean_dispatch, _overload, BroadcastingList1, BroadcastingList2, BroadcastingList3
File "/mnt/home/.local/lib/python3.8/site-packages/torch/_jit_internal.py", line 25, in <module>
import torch.distributed.rpc
File "/mnt/home/.local/lib/python3.8/site-packages/torch/distributed/__init__.py", line 55, in <module>
from .distributed_c10d import * # noqa: F403
File "/mnt/home/.local/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py", line 188, in <module>
reduce_op = _reduce_op()
File "/mnt/home/.local/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py", line 176, in __init__
for k, v in ReduceOp.__members__.items():
AttributeError: type object 'torch._C._distributed_c10d.ReduceOp' has no attribute '__members__'
any ideas?