Loading all dataset on GPU gives error

Hi all, i am trying to load all the dataset on the GPU this way:

class CustomDataset(Dataset):
    def __init__(self, laser_in,tf_label_in, transform_in=None, target_transform_in=None):
        self.laser              = torch.tensor(laser_in,dtype=torch.float32).to(device)
        self.tf_label           = torch.tensor(tf_label_in,dtype=torch.float32).to(device)
        self.transform          = transform_in
        self.target_transform   = target_transform_in
        self.outputs            = []

    def __len__(self):
        return len(self.tf_label) - 1

    def __getitem__(self, idx):
        return self.laser[idx], self.tf_label[idx]

set_complete = CustomDataset(laser_array.astype(np.float32),tf_array)

train_size = int(len(set_complete) * 0.8)
test_size  = len(set_complete)  - train_size
train_set, test_set = random_split(set_complete, [train_size,test_size ])

batch_size_train = 256

train_loader = DataLoader(train_set, batch_size=batch_size_train,shuffle=True, num_workers=1,pin_memory=False,persistent_workers=True)
test_loader  = DataLoader(test_set , batch_size=128             ,shuffle=True, num_workers=1,pin_memory=False,persistent_workers=True)

But when I execute it generates the following error :

  File "pre_train_model.py", line 112, in <module>
    for i, data in enumerate(train_loader):
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 530, in __next__
    data = self._next_data()
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1224, in _next_data
    return self._process_data(data)
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1250, in _process_data
    data.reraise()
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/_utils.py", line 457, in reraise
    raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/mutti/.local/lib/python3.8/site-packages/torch/utils/data/dataset.py", line 471, in __getitem__
    return self.dataset[self.indices[idx]]
  File "pre_train_model.py", line 49, in __getitem__
    return self.laser[idx], self.tf_label[idx]
RuntimeError: CUDA error: initialization error
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

what am I doing wrong ?
Thanks

Pass num workers = 0
Multiprocessing doesnt work well with cuda.

It works!

num_workers=0,pin_memory=False,persistent_workers=False

Thanks