DataParallel and custom gpu numbers

I want to use data-parallel for the following code, but I am confused about how to implement it CUDA_VISIBLE_DEVICES is set as 0 for 1 GPU in a separate file.
I will give the number of GPUs according to availability --gpu 4 5 6

facing the following error
AttributeError: module ‘torchvision.models’ has no attribute ‘to’

parser = argparse.ArgumentParser()
#list of parser
if __name__ == '__main__':
     train_loader = DataLoader(data_train, batch_size=BATCH, 
                                    pin_memory=True, drop_last=True)
     test_loader  = DataLoader(data_test, batch_size=BATCH)
     dataloaders  = {'train': train_loader, 'test': test_loader}
     #Data Parallel here I will give gpus 
     models = torch.nn.DataParallel(models , args.gpus).cuda()
     for cycle in range(CYCLES):
            # Randomly sample 10000 unlabeled data points
            if not
                subset = unlabeled_set[:SUBSET]   #SubSet=10,000
                #num = sys.getsizeof(subset)
                #print(num)   #80056

            # Model - create new instance for every cycle so that it resets
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                if args.dataset == "fashionmnist":
                    resnet18    = resnet.ResNet18fm(num_classes=NO_CLASSES).cuda()
                    #resnet18    = vgg11().cuda() 
                    resnet18    = resnet.ResNet18(num_classes=NO_CLASSES).cuda()
                if method == 'lloss':
                    #loss_module = LossNet(feature_sizes=[16,8,4,2], num_channels=[128,128,256,512]).cuda()
                    loss_module = LossNet().cuda()

            models      = {'backbone': resnet18}
            if method =='lloss':
                models = {'backbone': resnet18, 'module': loss_module}
            if method =='lloss':
                    models = {'backbone': resnet18, 'module': loss_module}

            torch.backends.cudnn.benchmark = True