I am trying to implement some music generation LSTM, but cant figure out how to properly shape my data. In current configuration, when i try to train my model, it just crashes my colab notebook, instantly, and it doesnt look like it is because of ram shortage, colab doesnt tell me that
Some code:
Wav File dataset: takes a data file and samples a seq_length samples from it, chopping one sample off as a label, returning everything else as a time sequence of one feature
class WavFileDataset(Dataset):
def __init__(self, file_path, seq_length):
sequence = pydub.AudioSegment.from_file(file_path) #Load wav
sequence = sequence.set_channels(1) #Stereo to mono
sequence = np.asarray(sequence.get_array_of_samples()) #Get numpy ndarray
self.length = len(sequence) - seq_length #Get how many sequences we can take from wav file
self.seq_length = seq_length #Save length of sequence that we will sample later
self.file_path = file_path #Save file path for later
def __len__(self):
return self.length #Return how many sequences we can take, not much here
def __getitem__(self,idx):
#Again, open and convert wav to ndarray
sequence = pydub.AudioSegment.from_file(self.file_path)
sequence = sequence.set_channels(1)
sequence = np.asarray(sequence.get_array_of_samples())
seq = sequence[idx:idx+self.seq_length] #Get a sequence of length = seq_length starting and idx
seq = (seq / (1<<15)) #Normalize for int16 audio to -1...1 value range
feature = seq[0:-1].astype('float32').reshape(1,-1) #get seq_length-1 samples as time sequence of feature
label = seq[-1:None].astype('float32').reshape(1,-1) #get last item from sequence as a label
#return everything
return feature,label
LSTM Model class:
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=100, output_size=1, batch_size_ = 128):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first = False)
self.linear = nn.Linear(hidden_layer_size, output_size)
self.hidden_cell = (torch.zeros(1,batch_size_,self.hidden_layer_size),
torch.zeros(1,batch_size_,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
Initialization code:
data_dir = r"./datasets/rammwav" #Dataset dir: a folder full of wav files
files = [os.path.join(data_dir,f) for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))] #Get all files
files = [files[0]] #temporary sampling from only one file, for speeding up dataset creation
datasets = []
files_count = len(files)
seq_length = 100
batch_size = 2
i = 1
print("Processing datasets...")
for file in files: #Create WavFileDataset list to feed into ConcatDataset
datasets.append(WavFileDataset(file,seq_length))
print_inline("Processed {}/{}".format(i,files_count)) #print_inline - a helper function: prints on the same line using sys.stdout.write("\r" + input_string)
i += 1
print("\n Datasets processed")
#Create model-related stuff
model = LSTM(input_size = 1, hidden_layer_size = 100, output_size = 1, batch_size_ = batch_size).cuda()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#Create dataset and dataloader
dataset = torch.utils.data.ConcatDataset(datasets)
data_loader = torch.utils.data.DataLoader(dataset,
batch_size=batch_size,
shuffle=True,
num_workers=0,
pin_memory=True,
drop_last=True)
#Some shape checking for testing purposes
test_batch = next(iter(data_loader))
print(test_batch[0].permute(2,0,1).shape)
Training loop:
epochs = 10
batches = len(data_loader)
losses = []
print("Starting training...")
for epoch in range(epochs): #iterate over epochs
batch = 1
for seq, labels in data_loader: #iterate over batches in data loader
starttime = time.time()
seq = seq.permute(2,0,1).cuda() #permute feature sequence batch to proper shape
labels = labels.permute(2,0,1).cuda() #permute label batch to proper shape
optimizer.zero_grad()
model.hidden_cell = (torch.zeros(1, batch_size, model.hidden_layer_size),
torch.zeros(1, batch_size, model.hidden_layer_size)) #reset hidden state
y_pred = model(seq)#make prediction
#Backprop and stuff:
single_loss = loss_function(y_pred, labels)
single_loss.backward()
optimizer.step()
print_inline("Batch {}/{} Time/batch: {:.2f}, Loss: ".format(batch,batches,time.time()-starttime, single_loss.item()))
batch += 1
losses.append(single_loss.item())
if i%5 == 1:
print("epoch: {}/{} loss:{}; Loss:{:.4f}".format(epoch,epochs,single_loss.item()))
I have no idea what is wrong, and why everything crashes, please help