Tracking a dataloader when loading large numpy arrays

Hi everyone

I am loading large numpy arrays of over >1GB in size. In total i have around 1,00,000 such files. But the issue is when i am trying to load them in the dataloader i am not getting a progress bar. I used tqdm on the dataloader but it is not showing the progress bar.
This is what i have done

class CustomDataset(Dataset):

def __init__(self, source_path, target_path, transform=None):
    self.source_data = [np.load(path, allow_pickle=True, mmap_mode='r') for path in source_path]
    self.target_data = [np.load(path, allow_pickle=True, mmap_mode='r') for path in target_path]
    self.start_indices = [0] * len(source_path)
    self.data_count_source = 0
    self.data_count_target = 0
    for index, memmap in enumerate(self.source_data):
        self.start_indices[index] = self.data_count_source
        self.data_count += memmap.shape[0]
    
    for index, memmap in enumerate(self.target_data):
        self.start_indices[index] = self.data_count_target
        self.data_count += memmap.shape[0]

def __len__(self):
    return self.data_count_source

def __getitem__(self, index):
    memmap_index_source = bisect(self.start_indices, index) - 1
    index_in_memmap_source = index - self.start_indices[memmap_index_source]
    source_data = self.source_data[memmap_index_source][index_in_memmap_source]
    memmap_index_target = bisect(self.start_indices, index) - 1
    index_in_memmap_target = index - self.start_indices[memmap_index_target]
    target_data = self.target_data[memmap_index_target][index_in_memmap_target]
    return source_data, target_data

root_dir = ‘en-hi/’
source_path, target_path = os.path.join(root_dir, ‘train’, ‘source’), os.path.join(root_dir, ‘train’, ‘target’)
train_source_file = glob.glob(source_path + ‘/.pt.npy’)
train_target_file = glob.glob(target_path + '/
.pt.npy’)
train_dataset = CustomDataset(train_source_file, train_target_file)
train_dataloader = DataLoader(train_dataset, batch_size=1, drop_last=False, num_workers=0)
pbar = tqdm(train_dataloader)
for d in pbar:
pass

Is there any way to track the update of loading the numpy arrays? Thank you for any kind of help