I want to reiterate over the file loaded by getitem function, because it contains many training samples (one file can contain up to 200 training samples),
do I have to create a subset on this dataset? How is it done? What’s best practice?
thx
def __len__(self):
return len(self.all_files)
def __getitem__(self, idx):
#load track from midi files
track = ppr.Multitrack(self.all_files[idx], beat_resolution=self.beat_res)
track = track.get_stacked_pianoroll()
#if 1 track midifile
if(track.shape[2]==1):
track = np.squeeze(track,2)
#quick fix for multitrack, melody in almost every song on midi[0]
else:
track = track[:,:,0]
#binarize
if(self.binarize):
track[track > 0] = 1
#full track length in ticks
length = track.shape[0]
while(True):
#get random (bar long) sequence from the given midi file
random = np.random.randint(0,(length-1)-self.seq_length)
sequence = track[random:random+self.seq_length,:]
#only return this sequence if it is not a zero for all ticks
if(np.any(sequence)):
break
#transpose notes out of range of the 5 chosen octaves
sequence = transposeNotesHigherLower(sequence)
#cut octaves to get input shape [96,60]
sequence = cutOctaves(sequence)
#unsqueeze first dimension for input
sequence = np.expand_dims(sequence, axis=0)
return sequence