I want to reiterate over the file loaded by getitem function, because it contains many training samples (one file can contain up to 200 training samples),
do I have to create a subset on this dataset? How is it done? What’s best practice?
def __len__(self): return len(self.all_files) def __getitem__(self, idx): #load track from midi files track = ppr.Multitrack(self.all_files[idx], beat_resolution=self.beat_res) track = track.get_stacked_pianoroll() #if 1 track midifile if(track.shape==1): track = np.squeeze(track,2) #quick fix for multitrack, melody in almost every song on midi else: track = track[:,:,0] #binarize if(self.binarize): track[track > 0] = 1 #full track length in ticks length = track.shape while(True): #get random (bar long) sequence from the given midi file random = np.random.randint(0,(length-1)-self.seq_length) sequence = track[random:random+self.seq_length,:] #only return this sequence if it is not a zero for all ticks if(np.any(sequence)): break #transpose notes out of range of the 5 chosen octaves sequence = transposeNotesHigherLower(sequence) #cut octaves to get input shape [96,60] sequence = cutOctaves(sequence) #unsqueeze first dimension for input sequence = np.expand_dims(sequence, axis=0) return sequence