RuntimeError: Caught RuntimeError in replica 0 on device 0

I have eight different models in a project in which two are works fine without giving any error on multiple GPUs. When I integrated a new model, I got a replica error and the point is that I made a config file and this file remains unchanged for both models. file

import torch, os
import yaml
from IPython import embed

def get_config(args):
    configuration = dict(
        SEED=1337,  # random seed for reproduce results
        INPUT_SIZE=[112, 112],  # support: [112, 112] and [224, 224]
        EMBEDDING_SIZE=512,  # feature dimension

    if args.workers_id == 'cpu' or not torch.cuda.is_available():
        configuration['GPU_ID'] = []
        print("check", args.workers_id, torch.cuda.is_available())
        configuration['GPU_ID'] = [int(i) for i in args.workers_id.split(',')]
    if len(configuration['GPU_ID']) == 0:
        configuration['DEVICE'] = torch.device('cpu')
        configuration['MULTI_GPU'] = False
        configuration['DEVICE'] = torch.device('cuda:%d' % configuration['GPU_ID'][0])
        if len(configuration['GPU_ID']) == 1:
            configuration['MULTI_GPU'] = False
            configuration['MULTI_GPU'] = True

    configuration['NUM_EPOCH'] = args.epochs
    configuration['BATCH_SIZE'] = args.batch_size

    if args.data_mode == 'casia':
        configuration['DATA_ROOT'] = '/media/khawar/HDD_Khawar/face_datasets/WIDER_train/images/'
    elif args.data_mode == "CelebA":
        configuration['DATA_ROOT'] = '/media/khawar/HDD_Khawar/face_datasets/CelebA'
    elif args.data_mode == "faces":
        configuration['DATA_ROOT'] = '/raid/khawar/dataset/faces/'
        # raise Exception(args.data_mode)

    configuration['EVAL_PATH'] = './eval/'
    assert in ['VIT', 'VITs', 'VITs_Eff', 'CAiT', 'DeepViT', 'PiT', 'LeViT','Comb_ViT', 'Dino_VIT', 'CvT','Swin', 'T2TViT'] 
    configuration['BACKBONE_NAME'] =
    assert args.head in ['Softmax', 'ArcFace', 'CosFace', 'SFaceLoss']
    configuration['HEAD_NAME'] = args.head
    configuration['TARGET'] = [i for i in',')]

    if args.resume:
        configuration['BACKBONE_RESUME_ROOT'] = args.resume
        configuration['BACKBONE_RESUME_ROOT'] = ''  # the root to resume training from a saved checkpoint
    configuration['WORK_PATH'] = args.outdir  # the root to buffer your checkpoints
    if not os.path.exists(args.outdir):

    return configuration

GPU_ID = cfg['GPU_ID']  # specify your GPU ids
print('GPU_ID', GPU_ID)
Traceback (most recent call last):
  File "", line 367, in <module>
    outputs, emb = BACKBONE(inputs.float(), labels)
  File "/raid/khawar/.local/lib/python3.6/site-packages/torch/nn/modules/", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/raid/khawar/.local/lib/python3.6/site-packages/torch/nn/parallel/", line 167, in forward
    outputs = self.parallel_apply(replicas, inputs, kwargs)
  File "/raid/khawar/.local/lib/python3.6/site-packages/torch/nn/parallel/", line 177, in parallel_apply
    return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
  File "/raid/khawar/.local/lib/python3.6/site-packages/torch/nn/parallel/", line 86, in parallel_apply
  File "/raid/khawar/.local/lib/python3.6/site-packages/torch/", line 429, in reraise
    raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in replica 0 on device 0.