Im trying to train a model to know whats background noise and whats not and I keep getting this error. Ive truncated it so it should be the same size but its not, any help would be appreciated! Im new to this so im not sure why the post is popping up weird
import torch
import torchaudio
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
Load background noise and speech audio files
background_files = [“C:\Users\iaddi\Downloads\crowd_talking-6762.mp3”]
speech_files = [“C:\Users\iaddi\Downloads\Cam_1.mp3”]
Audio preprocessing
def pad_truncate(audio, max_length=16000):
print(f"Before: {len(audio)}")
if len(audio) > max_length:
audio = audio[:max_length]
elif len(audio) < max_length:
audio = F.pad(audio, (0, max_length - len(audio)))
return audio
Create dataset
class AudioDataset(Dataset):
def init(self, background_files, speech_files):
self.background_files = background_files
self.speech_files = speech_files
def __getitem__(self, index):
if index < len(self.background_files):
audio, sample_rate = torchaudio.load(self.background_files[index])
audio = audio.squeeze(0)
audio = pad_truncate(audio)
print(len(audio))
label = 0
else:
audio, sample_rate = torchaudio.load(self.speech_files[index-len(self.background_files)])
label = 1
return audio, label
def __len__(self):
return len(self.background_files) + len(self.speech_files)
Create model
class AudioClassifier(nn.Module):
def init(self):
super().init()
self.conv1 = nn.Conv2d(1, 16, 3, stride=2, padding=1)
self.conv2 = nn.Conv2d(16, 32, 3, stride=2, padding=1)
self.conv3 = nn.Conv2d(32, 64, 3, stride=2, padding=1)
self.classifier = nn.Linear(64, 1)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = self.conv3(x)
x = F.relu(x)
x = torch.mean(x, dim=2)
x = self.classifier(x)
return x
Train model
dataset = AudioDataset(background_files, speech_files)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
model = AudioClassifier()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters())
num_epochs = 10
for epoch in range(num_epochs):
for i, (inputs, labels) in enumerate(dataloader):
# Forward pass and loss
outputs = model(inputs)
loss = criterion(outputs, labels.unsqueeze(1).float())
# Backward pass and update
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
print(‘Finished training’)