Hi everyone, I’m trying to create the train, test and evaluate datasets with SubsetRandomSampler but I’m unfortunately not understanding how it actually works. Can anyone please help me?
csv_file=‘AudioData/iden_split2.csv’
root_dir=‘AudioData/wav/’
class SpeakerRecognitionDataset(Dataset):
def init(self, csv_file, root_dir, transform=None):
“”"
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the sounds.
transform (callable, optional): Optional transform to be applied
on a sample.
“”"
# Read the Data of every speaker
self.speaker_sound = pd.read_csv(csv_file, sep=’ ')
self.root_dir = root_dir
self.transform = transform
def __len__(self):
# Number of all sounds
return len(self.speaker_sound)
def __getitem__(self, idx):
#if torch.is_tensor(idx):
# idx = idx.tolist()
sound_file_name = os.path.join(self.root_dir, self.speaker_sound.iloc[idx, 1])
if os.path.isfile(sound_file_name):
waveform, sample_rate = torchaudio.load(sound_file_name)
if self.transform:
waveform = self.transform(waveform)
return waveform
speaker = SpeakerRecognitionDataset(csv_file,
root_dir)
dataloader = DataLoader(speaker, batch_size=10,
shuffle=True, num_workers=1, pin_memory=True)