I’m using a CNN to train a classifier on audio data. The data has 5 classes, and the the training loss and validation loss remain ~constant.
What am I doing wrong?
PARAM = {'LEARNING_RATE': 0.001, 'GRADIENT_CLIP': 20, 'EPOCH': 25}
class AudioCNN(nn.Module):
def __init__(self, out_channel, kernel_size, output_size, padding, input_size, batch_size, dropout):
super(AudioCNN, self).__init__()
self.out_channel = out_channel
self.kernel_size = kernel_size
self.output_size= output_size
self.padding = padding
self.input_size = input_size
self.batch_size = batch_size
self.conv1 = nn.Conv2d(in_channels=1, out_channels=out_channel, kernel_size=kernel_size, padding=padding)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(12960, 64)
self.fc2 = nn.Linear(64, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = x.view(self.batch_size, -1)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Training loop:
model = AudioCNN(out_channel=6, kernel_size=3, output_size=5, padding=1,
input_size=216, batch_size=256, dropout=0.25)
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
if torch.cuda.is_available():
model.cuda()
counter = 0
for e in range(epoch):
train_loss_list, val_loss_list, train_acc_list, val_acc_list = [], [], [], []
for train_inputs, train_labels in train_loader:
counter += 1
train_inputs = train_inputs.view(256, 1, -1, 216).float()
if torch.cuda.is_available():
train_inputs, train_labels = train_inputs.cuda(), train_labels.cuda()
model.zero_grad()
train_output = model(train_inputs)
_, train_pred = torch.max(torch.sigmoid(train_output), 1)
train_acc, train_pr, train_rc = _metric_summary(pred=train_pred.cpu().numpy(),
label=train_labels.cpu().numpy())
train_loss = criterion(torch.sigmoid(train_output), train_labels)
train_loss.backward()
optimizer.step()
train_loss_list.append(train_loss.item())
train_acc_list.append(train_acc)
nn.utils.clip_grad_norm_(model.parameters(), gradient_clip)
model.eval()
for val_inputs, val_labels in valid_loader:
if torch.cuda.is_available():
val_inputs, val_labels = val_inputs.cuda(), val_labels.cuda()
val_inputs = val_inputs.view(256, 1, -1, 216).float()
val_output = model(val_inputs)
val_loss = criterion(torch.sigmoid(val_output), val_labels)
val_loss_list.append(val_loss.item())
val_acc, val_pr, val_rc = _metric_summary(
pred=torch.max(val_output, dim=1).indices.data.cpu().numpy(),
label=val_labels.cpu().numpy(),
)
val_acc_list.append(val_acc)
Training log output
2020-11-14 20:45:42,795 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 1/25...Training Loss: 1.707...Validation Loss: 1.720...Train Accuracy: 0.198...Test Accuracy: 0.184
2020-11-14 20:45:46,995 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 2/25...Training Loss: 1.704...Validation Loss: 1.695...Train Accuracy: 0.199...Test Accuracy: 0.209
2020-11-14 20:45:50,795 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 3/25...Training Loss: 1.698...Validation Loss: 1.711...Train Accuracy: 0.206...Test Accuracy: 0.194
2020-11-14 20:45:54,538 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 4/25...Training Loss: 1.706...Validation Loss: 1.714...Train Accuracy: 0.198...Test Accuracy: 0.191
2020-11-14 20:45:58,110 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 5/25...Training Loss: 1.710...Validation Loss: 1.692...Train Accuracy: 0.196...Test Accuracy: 0.212
2020-11-14 20:46:01,948 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 6/25...Training Loss: 1.706...Validation Loss: 1.720...Train Accuracy: 0.199...Test Accuracy: 0.185
2020-11-14 20:46:05,794 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 7/25...Training Loss: 1.707...Validation Loss: 1.743...Train Accuracy: 0.200...Test Accuracy: 0.195
2020-11-14 20:46:09,611 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 8/25...Training Loss: 1.732...Validation Loss: 1.737...Train Accuracy: 0.201...Test Accuracy: 0.206
2020-11-14 20:46:13,316 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 9/25...Training Loss: 1.726...Validation Loss: 1.726...Train Accuracy: 0.203...Test Accuracy: 0.206
2020-11-14 20:46:17,773 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 10/25...Training Loss: 1.739...Validation Loss: 1.740...Train Accuracy: 0.192...Test Accuracy: 0.191
2020-11-14 20:46:22,028 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 11/25...Training Loss: 1.741...Validation Loss: 1.740...Train Accuracy: 0.196...Test Accuracy: 0.203
2020-11-14 20:46:26,288 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 12/25...Training Loss: 1.731...Validation Loss: 1.741...Train Accuracy: 0.195...Test Accuracy: 0.195
2020-11-14 20:46:30,495 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 13/25...Training Loss: 1.725...Validation Loss: 1.733...Train Accuracy: 0.212...Test Accuracy: 0.214
2020-11-14 20:46:34,706 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 14/25...Training Loss: 1.731...Validation Loss: 1.729...Train Accuracy: 0.201...Test Accuracy: 0.197
2020-11-14 20:46:38,751 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 15/25...Training Loss: 1.728...Validation Loss: 1.725...Train Accuracy: 0.203...Test Accuracy: 0.209
2020-11-14 20:46:42,717 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 16/25...Training Loss: 1.731...Validation Loss: 1.729...Train Accuracy: 0.198...Test Accuracy: 0.235
2020-11-14 20:46:46,654 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 17/25...Training Loss: 1.730...Validation Loss: 1.734...Train Accuracy: 0.201...Test Accuracy: 0.207
2020-11-14 20:46:50,871 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 18/25...Training Loss: 1.732...Validation Loss: 1.736...Train Accuracy: 0.197...Test Accuracy: 0.196
2020-11-14 20:46:54,966 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 19/25...Training Loss: 1.738...Validation Loss: 1.715...Train Accuracy: 0.195...Test Accuracy: 0.214
2020-11-14 20:46:58,946 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 20/25...Training Loss: 1.736...Validation Loss: 1.735...Train Accuracy: 0.201...Test Accuracy: 0.195
2020-11-14 20:47:02,932 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 21/25...Training Loss: 1.724...Validation Loss: 1.714...Train Accuracy: 0.204...Test Accuracy: 0.220
2020-11-14 20:47:07,129 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 22/25...Training Loss: 1.723...Validation Loss: 1.737...Train Accuracy: 0.199...Test Accuracy: 0.194
2020-11-14 20:47:11,508 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 23/25...Training Loss: 1.733...Validation Loss: 1.747...Train Accuracy: 0.196...Test Accuracy: 0.188
2020-11-14 20:47:15,721 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 24/25...Training Loss: 1.737...Validation Loss: 1.748...Train Accuracy: 0.203...Test Accuracy: 0.196
2020-11-14 20:47:19,874 — audio_model.audio_model.model_manager — INFO —train:186 — Epoch: 25/25...Training Loss: 1.734...Validation Loss: 1.720...Train Accuracy: 0.197...Test Accuracy: 0.206