Batch_norm causes RuntimeError: Expected all tensors to be on the same device , but found at least two devices, cuda:0 and cpu!

I trained a HRFormer model on GPU without any errors. However, an inference by this model caused the following error message:

Traceback (most recent call last):
  File "demo/image_demo.py", line 105, in <module>
    main()
  File "demo/image_demo.py", line 85, in main
    batch_results = inference_topdown(model, args.img)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/apis/inference.py", line 192, in inference_topdown
    results = model.test_step(batch)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 145, in test_step
    return self._run_forward(data, mode='predict')  # type: ignore
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 326, in _run_forward
    results = self(**data, mode=mode)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/pose_estimators/base.py", line 142, in forward
    return self.predict(inputs, data_samples)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/pose_estimators/topdown.py", line 103, in predict
    _feats = self.extract_feat(inputs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/pose_estimators/base.py", line 188, in extract_feat
    x = self.backbone(inputs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrnet.py", line 583, in forward
    y_list = self.stage2(x_list)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/container.py", line 227, in forward
    input = module(input)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrnet.py", line 200, in forward
    x[i] = self.branches[i](x[i])
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/container.py", line 227, in forward
    input = module(input)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrformer.py", line 387, in forward
    x = x + self.drop_path(self.ffn(self.norm2(x), H, W))
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrformer.py", line 313, in forward
    x = layer(x)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py", line 741, in forward
    return F.batch_norm(
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/functional.py", line 2471, in batch_norm
    return torch.batch_norm(

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument running_var in method wrapper_CUDA__cudnn_batch_norm)

The error says the tensor inputs to the function batch_norm in torch.nn.functional are not on the same device. I inspected these tensors by adding the followings to the batch_norm function:

....
def batch_norm(
    input: Tensor,
    running_mean: Optional[Tensor],
    running_var: Optional[Tensor],
    weight: Optional[Tensor] = None,
    bias: Optional[Tensor] = None,
    training: bool = False,
    momentum: float = 0.1,
    eps: float = 1e-5,
) -> Tensor:
    r"""Applies Batch Normalization for each channel across a batch of data.

    See :class:`~torch.nn.BatchNorm1d`, :class:`~torch.nn.BatchNorm2d`,
    :class:`~torch.nn.BatchNorm3d` for details.
    """
    if has_torch_function_variadic(input, running_mean, running_var, weight, bias):
        return handle_torch_function(
            batch_norm,
            (input, running_mean, running_var, weight, bias),
            input,
            running_mean,
            running_var,
            weight=weight,
            bias=bias,
            training=training,
            momentum=momentum,
            eps=eps,
        )
    if training:
        _verify_batch_size(input.size())
    
    
   """ What I added """
    if torch.is_tensor(input):
        print('input is on        ', input.device) 
    if torch.is_tensor(weight):
        print('weight is on       ', weight.device) 
    if torch.is_tensor(bias):
        print('bias is on         ', bias.device) 
    if torch.is_tensor(running_mean):  
        print('running_mean is on ', running_mean.device) # ---> 'running mean' is on CPU! 
    if torch.is_tensor(training): 
        print('training is on     ', training.device) 
    if torch.is_tensor(momentum): 
        print('momentum is on     ', momentum.device) 
    if torch.is_tensor(eps): 
        print('eps is on          ', eps.device) 
    # print('running_mean\n', running_mean.device)
    # print('running_mean\n', running_mean)
    
    return torch.batch_norm(
        input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
    )

and found that the variable running_mean is on a CPU for some reason. I tried to move the variable to CUDA by adding below to the batch_norm

if not running_mean.is_cuda:
    running_mean = running_mean.to('cuda')

Now all the tensors are on cuda:0 but the error still remains:

input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
input is on         cuda:0
weight is on        cuda:0
bias is on          cuda:0
running_mean is on  cuda:0
Traceback (most recent call last):
  File "demo/image_demo.py", line 105, in <module>
    main()
  File "demo/image_demo.py", line 85, in main
    batch_results = inference_topdown(model, args.img)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/apis/inference.py", line 192, in inference_topdown
    results = model.test_step(batch)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 145, in test_step
    return self._run_forward(data, mode='predict')  # type: ignore
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 326, in _run_forward
    results = self(**data, mode=mode)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/pose_estimators/base.py", line 142, in forward
    return self.predict(inputs, data_samples)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/pose_estimators/topdown.py", line 103, in predict
    _feats = self.extract_feat(inputs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/pose_estimators/base.py", line 188, in extract_feat
    x = self.backbone(inputs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrnet.py", line 583, in forward
    y_list = self.stage2(x_list)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/container.py", line 227, in forward
    input = module(input)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrnet.py", line 200, in forward
    x[i] = self.branches[i](x[i])
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/container.py", line 227, in forward
    input = module(input)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrformer.py", line 387, in forward
    x = x + self.drop_path(self.ffn(self.norm2(x), H, W))
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/dkobayas/Pose-Estimation/mmpose/mmpose/models/backbones/hrformer.py", line 313, in forward
    x = layer(x)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py", line 741, in forward
    return F.batch_norm(
  File "/depot/cfrueh/apps/env_mmpose_nospyder/lib/python3.8/site-packages/torch/nn/functional.py", line 2471, in batch_norm
    return torch.batch_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument running_var in method wrapper_CUDA__cudnn_batch_norm)

I have no idea how to fix this issue. Any advice would be helpful!

The error message seems to point to running_var variable. Can you check that too?

Thank you for pointing it out! It turns out that running_var is also on cpu for some reason. After moving it to GPU, the problem has been resolved!