I’m trying to make a CNN audio classification model
class AudioClassification(nn.Module):
def __init__(self, input_count,output_count, channel_count=32, stride=16 ):
super().__init__()
self.conv1 = nn.Conv1d(input_count, channel_count, kernel_size=80, stride=stride)
self.bn1 = nn.BatchNorm1d(channel_count)
self.pool1 = nn.MaxPool1d(4)
self.conv2 = nn.Conv1d(channel_count, channel_count, kernel_size=3)
self.bn2 = nn.BatchNorm1d(channel_count)
self.pool2 = nn.MaxPool1d(4)
self.fc1 = nn.Linear(2 * channel_count, output_count)
def forward(self, x):
x = self.conv1(x)
x = F.relu(self.bn1(x))
x = self.pool1(x)
x = self.conv2(x)
x = F.relu(self.bn2(x))
x = self.pool2(x)
x = self.fc1(x)
return F.log_softmax(x, dim=2)
def train(model, epoch, log_interval):
model.train()
for batch_count, (data, target) in enumerate(train_loader):
data = transform(data)
output = model(data)
loss = F = F.nll_loss(output.squeeze(), target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_count % log_interval == 0:
print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")
pbar.update(pbar_update)
losses.append(loss.item())
and I’m running into the following error:
RuntimeError: Expected 3-dimensional input for 3-dimensional weight [32, 22050, 80], but got 2-dimensional input of size [32, 22050] instead
I understand its from the kernel_size but I’m not sure how to change my data to include it