I have a pickle file having my training data in format as given below:
'Ses01M_script03_1_F011': {'audio_data': array([ 97, 173, 139, ..., 74, 22, 56], dtype=int16),
'emo_label': 0,
'gen_label': 0,
'transcript': 'mmm',
'features': array([[-0.72736262, 0.43802561, 2.14846694, ..., -2.69805505,
-2.6575799 , -2.62012389],
[ 1.64373051, 1.45195623, 2.5100556 , ..., -3.49289518,
-3.33669469, -3.24162892],
[ 0.06487407, 2.2071517 , 2.77620356, ..., -4.97157829,
-5.32930986, -9.44662813],
...,
[ 1.9662066 , 2.45063788, 2.64235023, ..., -5.53699691,
-5.63285812, -8.2494293 ],
[ 2.46171323, 2.83857374, 2.91717236, ..., -5.23233626,
-6.12257485, -6.33155109],
[ 2.27569999, 2.81201242, 2.78668791, ..., -3.50327683,
-3.70423857, -3.75825296]])},
...}
so basically its a dictionary. I want to load this information using dataloader , For that i want to create a speechdatagenrator class similar to given below:
import numpy as np
import torch
from utils import utils_wav
class SpeechDataGenerator():
"""Speech dataset."""
def __init__(self, manifest, mode):
"""
Read the textfile and get the paths
"""
self.mode=mode
self.audio_links = [line.rstrip('\n').split(' ')[0] for line in open(manifest)] # here manifest is the location where training data is saved.
self.emo_labels = [int(line.rstrip('\n').split(' ')[1]) for line in open(manifest)]
self.gen_labels = [int(line.rstrip('\n').split(' ')[2]) for line in open(manifest)]
def __len__(self):
return len(self.audio_links)
def __getitem__(self, idx):
audio_link =self.audio_links[idx]
class_id = self.emo_labels[idx]
#lang_label=lang_id[self.audio_links[idx].split('/')[-2]]
audio_data = utils_wav.load_data_wav(audio_link,min_dur_sec=10)
sample = {'raw_speech': torch.from_numpy(np.ascontiguousarray(audio_data)), 'labels': torch.from_numpy(np.ascontiguousarray(class_id))}
return sample
Can any on please tell me how to modify this class to read this pickle file, inside which there is a dictionary to load features , emo_labels and gen_labels.
seeking guidance.
good day all.