Bottleneck in dataloader with torchaudio

This is my first post here, so it might be a noob question.
I’ve been experiencing an issue where my dataloader that reads audio files from disk is causing a bottleneck. The GPU is stuck at about 50% utilization. And looking at the CPU cores with htop, it’s only using about 2.5 to 3 threads out of the 12 on my CPU. I’ve confirmed the data reads are only about 10MB/s which is far from the top speed of my SSD.

If you’re wondering, I had compiled torchaudio myself on ArchLinux with FFmpeg support. My CPU is a Ryzen 5 with 6 cores, 12 threads.

class AudioFolderDataset(Dataset):
    def __init__(self, root,
                 randomize=True, class_to_idx=None,
        self.root = root
        classes = sorted(os.listdir(root))
        self.class_to_idx = class_to_idx or {cls: i for i, cls in enumerate(classes)}
        self.samples = self._make_dataset()
        self.randomize_prob = randomize_prob
        self.randomize = randomize
        self.target_length_sec = target_length_sec
        self.target_sample_rate = target_sample_rate

    def _make_dataset(self):
        samples = []
        for class_name, class_idx in self.class_to_idx.items():
            class_path = os.path.join(self.root, class_name)
            for subdir in os.listdir(class_path):
                sub_path = os.path.join(class_path, subdir)
                for file_name in os.listdir(sub_path):
                    path = os.path.join(sub_path, file_name)
                    item = (path, class_idx)
        return samples

    def __len__(self):
        return len(self.samples)

    def get_labels(self):
        return [s[1] for s in self.samples]

    def get_random(self):
        if not hasattr(self, '_rand_gen'):
            self._rand_gen = random.Random()
        return self._rand_gen

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        rand_gen = self.get_random()
        randomize = self.randomize and (rand_gen.random() < self.randomize_prob)
        #ainfo = info(path, backend='ffmpeg')
        offset = 0
        target_length = target_sample_rate * self.target_length_sec
        waveform, sample_rate = load(
        if sample_rate != self.target_sample_rate:
            waveform = Resample(sample_rate, self.target_sample_rate)(waveform)
            sample_rate = self.target_sample_rate
        target = sample_rate*target_length_sec
        return waveform, label, path
train_dataset = AudioFolderDataset(
        'data/train/', randomize=True
train_dataloader = DataLoader(
    train_dataset, batch_size=batch_size, num_workers=12, shuffle=True, pin_memory=True

Thanks in advance