I have created custom dataset with __get__
and __len__
functions, but using map
method on the instance of the datasets returns me an error, that dataset has no attribtute map
Here is dataset:
class trainingDataset(Dataset):
def __init__(self):
super().__init__()
self.trainingAudio = audioWorker()
self.trainingAudio.set = "training"
def __len__(self):
return self.trainingAudio.vocabLen
def __getitem__(self, idx):
return self.trainingAudio.getAudio(idx)
and here is what I am trying to do:
def prepare_dataset(batch):
audio = batch["audio"]
# batched output is "un-batched"
batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
batch["input_length"] = len(batch["input_values"])
with processor.as_target_processor():
batch["labels"] = processor(batch["sentence"]).input_ids
return batch
testDataset = testDataset.map(prepare_dataset)
AttributeError: 'testDataset' object has no attribute 'map'
It’s looking really strange, because it was working well before