I’m building a model to classify the ‘difficulty’ of guitar music from midi files. During evaluation, my model consistently reaches poor accuracy values (~37%) and during prediction my model will consistently predict wrong (often predicts different pieces at the same grade.)
Below is the relevant code with my training and preprocessing:
def seq_prep(sequences):
#print(sequences)
network_input = []
for sequence in sequences:
translated_seq = []t
translated_seq.append([notes_to_num[item] for item in sequence])
network_input.append(translated_seq[0])
return network_input
class DifficultyClassifer(nn.Module):
def __init__(self, input_size, num_classes, hidden_size=384):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=7, batch_first=True, dropout=0.4, bidirectional=True)
self.fc1 = nn.Linear(hidden_size*2, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, 128)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(128, 64)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(64, num_classes)
def forward(self, x):
out, _ = self.lstm(x)
#print(out.shape)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.relu2(out)
out = self.fc3(out)
out = self.relu3(out)
out = self.fc4(out)
return out
test = []
def split(sequences):
sequences = seq_prep(sequences)
# print(sequences)
padded_sequences = pad_sequence([torch.tensor(seq) for seq in sequences], batch_first=True, padding_value=-1)
padded_sequences = (padded_sequences) / (n_vocab)
grades_tensor = torch.tensor(grades, dtype=torch.long) #converts to tensor
train_sequences, test_sequences, train_grades, test_grades = train_test_split(padded_sequences, grades_tensor, test_size=0.01)
train_loader = DataLoader(list(zip(train_sequences, train_grades)), shuffle=False, batch_size=batch_size)
test_loader = DataLoader(list(zip(test_sequences, test_grades)), shuffle=False, batch_size=batch_size)
return train_loader, test_loader
def train_network(train_loader):
model = DifficultyClassifer(input_size=max_length, num_classes=9)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=25, verbose=True)
num_epochs = 200
start_time = time.time()
for epoch in range(num_epochs):
print(f"Epoch {epoch+1}\n-------------------------------")
model.train()
for batch, (input_batch, output_batch) in enumerate(train_loader):
output = model(input_batch)
output_batch = output_batch.view(-1)
loss = criterion(output, output_batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 25 == 0:
print(f"Batch: {batch} - Loss: {loss.item():>7f}")
losses.append(loss.detach().numpy())
scheduler.step(loss)
end_time = time.time()-start_time
print(f"Time taken: {end_time}")
torch.save(model.state_dict(), "full_difficulty_pred.pth")
def eval_network(val_loader):
model = DifficultyClassifer(input_size=max_length, num_classes=9)
model.eval()
model.to(device)
model_dict = torch.load('full_difficulty_pred.pth')
model.load_state_dict(model_dict)
total_correct = 0
total_samples = 0
all_true_labels = []
all_predicted_labels = []
with torch.no_grad():
for val_batch, (val_input_batch, val_output_batch) in enumerate(val_loader):
val_output = model(val_input_batch)
print("--------------------\n")
print(f"RAW VALID OUTPUT: {val_output}")
val_output_batch = val_output_batch.view(-1)
# print("EXPECTED GRADE: ", val_output_batch)
_, predicted_class = torch.max(val_output, 1)
# print(f"PREDICTED CLASS: {predicted_class.item()}")
all_true_labels.extend(val_output_batch.cpu().numpy())
all_predicted_labels.extend(predicted_class.cpu().numpy())
# print(f"VAL TRUE CLASS : {val_output_batch}")
# print(f"VAL PREDICTED CLASS : {predicted_class}")
total_correct += (predicted_class == val_output_batch).sum().item()
total_samples += val_output_batch.size(0)
accuracy = accuracy_score(all_true_labels, all_predicted_labels)
precision = precision_score(all_true_labels, all_predicted_labels, average='weighted')
recall = recall_score(all_true_labels, all_predicted_labels, average='weighted')
f1 = f1_score(all_true_labels, all_predicted_labels, average='weighted')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')
I’ve double checked that the pre-processing of the training/validation and prediction data is the same, along with checking my validation method to ensure it’s not giving me a false accuracy. I’m not sure what the problem could be.
I made the dataset myself and it is not feasible for me to increase the amount of data any further. Each class has a balanced amount of data.
I’ve tried to incrementally increase the complexity of my model alongside training for longer but to no avail and now I’m not sure what I can do in attempt to improve my results.
I would appreciate in any help improving the accuracy of my model. Thank you!