I am trying to do an Isolated Speech Digits with LCNN (Light Convolutional Neural Network), right now I am stuck on this error whenever I run this code
P.S. I am doing this on Jupyter Notebook
optimizer = torch.optim.Adam(model.parammeters(), lr=0.07)
# Specify the loss criteria
loss_criteria = nn.CrossEntropyLoss()
# Track metrics in these arrays
epoch_nums = []
training_loss = []
validation_loss = []
# Train over 10 epochs (We restrict to 10 for time issues)
epochs = 10
print('Training on', device)
for epoch in range(1, epochs + 1):
train_loss = train(model, device, train_loader, optimizer, epoch)
test_loss = test(model, device, test_loader)
epoch_nums.append(epoch)
training_loss.append(train_loss)
validation_loss.append(test_loss)
The Whole code
import matplotlib.pyplot as plt
from matplotlib.backend_bases import RendererBase
from scipy import signal
from scipy.io import wavfile
#import soundfile as sf
import os
import numpy as np
from PIL import Image
from scipy.fftpack import fft
from torch.optim import Adam, SGD
from torch.nn import Linear, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
%matplotlib inline
audio_path = 'Dataset/train/'
pict_Path = 'Dataset/pics/train/'
test_audio_path = 'Dataset/test/'
test_pict_Path = 'Dataset/pics/test/'
val_audio_path = 'Dataset/validation/'
val_pict_Path = 'Dataset/pics/validation/'
samples = []
subFolderList = []
for x in os.listdir(audio_path):
if os.path.isdir(audio_path + x):
subFolderList.append(x)
subFolderList_test = []
for y in os.listdir(audio_path):
if os.path.isdir(audio_path + y):
subFolderList_test.append(y)
subFolderList_val = []
for k in os.listdir(val_audio_path):
if os.path.isdir(val_audio_path + k):
subFolderList_val.append(k)
if not os.path.exists(pict_Path):
os.makedirs(pict_Path)
if not os.path.exists(test_pict_Path):
os.makedirs(test_pict_Path)
if not os.path.exists(val_pict_Path):
os.makedirs(val_pict_Path)
subFolderList = []
for x in os.listdir(audio_path):
if os.path.isdir(audio_path + x):
subFolderList.append(x)
if not os.path.exists(pict_Path + x):
os.makedirs(pict_Path + x)
subFolderList_test = []
for y in os.listdir(audio_path):
if os.path.isdir(audio_path + y):
subFolderList_test.append(y)
if not os.path.exists(test_pict_Path + y):
os.makedirs(test_pict_Path + y)
subFolderList_val = []
for k in os.listdir(val_audio_path):
if os.path.isdir(val_audio_path + k):
subFolderList_val.append(k)
if not os.path.exists(val_pict_Path + k):
os.makedirs(val_pict_Path + k)
sample_audio = []
total = 0
for x in subFolderList:
# get all the wave files
all_files = [y for y in os.listdir(audio_path + x) if '.wav' in y]
total += len(all_files)
# collect the first file from each dir
sample_audio.append(audio_path + x + '/'+ all_files[0])
# show file counts
print('count: %d : %s' % (len(all_files), x ))
print(total)
sample_audio = []
total = 0
for x in subFolderList_test:
# get all the wave files
all_files = [y for y in os.listdir(test_audio_path + x) if '.wav' in y]
total += len(all_files)
# collect the first file from each dir
sample_audio.append(test_audio_path + x + '/'+ all_files[0])
# show file counts
print('count: %d : %s' % (len(all_files), x ))
print(total)
sample_audio = []
total = 0
for x in subFolderList_val:
# get all the wave files
all_files = [y for y in os.listdir(val_audio_path + x) if '.wav' in y]
total += len(all_files)
# collect the first file from each dir
sample_audio.append(val_audio_path + x + '/'+ all_files[0])
# show file counts
print('count: %d : %s' % (len(all_files), x ))
print(total)
def log_specgram(audio, sample_rate, window_size=20,
step_size=10, eps=1e-10):
nperseg = int(round(window_size * sample_rate / 1e3))
noverlap = int(round(step_size * sample_rate / 1e3))
freqs, _, spec = signal.spectrogram(audio,
fs=sample_rate,
window='hann',
nperseg=nperseg,
noverlap=noverlap,
detrend=False)
return freqs, np.log(spec.T.astype(np.float32) + eps)
def wav2img(wav_path, targetdir='', figsize=(4,4)):
"""
takes in wave file path
and the fig size. Default 4,4 will make images 288 x 288
"""
fig = plt.figure(figsize=figsize)
# use soundfile library to read in the wave files
samplerate, test_sound = wavfile.read(wav_path)
_, spectrogram = log_specgram(test_sound, samplerate)
## create output path
output_file = wav_path.split('/')[-1].split('.wav')[0]
output_file = targetdir +'/'+ output_file
plt.imshow(spectrogram.T, aspect='auto', origin='lower')
plt.imsave('%s.png' % output_file, spectrogram)
plt.close()
for i, x in enumerate(subFolderList):
print(i, ':', x)
# get all the wave files
all_files = [y for y in os.listdir(audio_path + x) if '.wav' in y]
for file in all_files[:]:
wav2img(audio_path + x + '/' + file, pict_Path + x)
print("Done!")
for i, x in enumerate(subFolderList_test):
print(i, ':', x)
# get all the wave files
all_files = [y for y in os.listdir(test_audio_path + x) if '.wav' in y]
for file in all_files[:]:
wav2img(test_audio_path + x + '/' + file, test_pict_Path + x)
print("Done!")
for i, x in enumerate(subFolderList_val):
print(i, ':', x)
# get all the wave files
all_files = [y for y in os.listdir(val_audio_path + x) if '.wav' in y]
for file in all_files[:]:
wav2img(val_audio_path + x + '/' + file, val_pict_Path + x)
print("Done!")
class mfm(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, type=1):
super(mfm, self).__init__()
self.out_channels = out_channels
if type == 1:
self.filter = nn.Conv2d(in_channels, 2*out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
else:
self.filter = nn.Linear(in_channels, 2*out_channels)
def forward(self, x):
x = self.filter(x)
out = torch.split(x, self.out_channels, 1)
return torch.max(out[0], out[1])
class group(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super(group, self).__init__()
self.conv_a = mfm(in_channels, in_channels, 1, 1, 0)
self.conv = mfm(in_channels, out_channels, kernel_size, stride, padding)
def forward(self, x):
x = self.conv_a(x)
x = self.conv(x)
return x
class resblock(nn.Module):
def __init__(self, in_channels, out_channels):
super(resblock, self).__init__()
self.conv1 = mfm(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
self.conv2 = mfm(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
def forward(self, x):
res = x
out = self.conv1(x)
out = self.conv2(out)
out = out + res
return out
class network_9layers(nn.Module):
def __init__(self, num_classes=79077):
super(network_9layers, self).__init__()
self.features = nn.Sequential(
mfm(1, 48, 5, 1, 2),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
group(48, 96, 3, 1, 1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
group(96, 192, 3, 1, 1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
group(192, 128, 3, 1, 1),
group(128, 128, 3, 1, 1),
nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
)
self.fc1 = mfm(8*8*128, 256, type=0)
self.fc2 = nn.Linear(256, num_classes)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = F.dropout(x, training=self.training)
out = self.fc2(x)
return out, x
def LightCNN_9Layers(**kwargs):
model = network_9layers(**kwargs)
return model
model = LightCNN_9Layers(num_classes=79077)
def load_dataset(data_path):
import torch
import torchvision
import torchvision.transforms as transforms
# Load all the images
transformation = transforms.Compose([
# Randomly augment the image data
# Random horizontal flip
transforms.RandomHorizontalFlip(0.5),
# Random vertical flip
transforms.RandomVerticalFlip(0.3),
# transform to tensors
transforms.ToTensor(),
# Normalize the pixel values (in R, G, and B channels)
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
# Resize Images
transforms.Resize((128,128))
])
# Load all of the images, transforming them
full_dataset = torchvision.datasets.ImageFolder(
root='Dataset/pics/train',
transform=transformation
)
full_dataset_test = torchvision.datasets.ImageFolder(
root='Dataset/pics/test',
transform=transformation
)
# Split into training (70% and testing (30%) datasets)
train_dataset = full_dataset
test_dataset = full_dataset_test
# use torch.utils.data.random_split for training/test split
#train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
# define a loader for the training data we can iterate through in 50-image batches
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=50,
num_workers=0,
shuffle=False
)
# define a loader for the testing data we can iterate through in 50-image batches
test_loader = torch.utils.data.DataLoader(
test_dataset,
batch_size=50,
num_workers=0,
shuffle=False
)
return train_loader, test_loader
# Recall that we have resized the images and saved them into
train_folder = 'Dataset/pics/train'
# Get the iterative dataloaders for test and training data
train_loader, test_loader = load_dataset(train_folder)
batch_size = train_loader.batch_size
print("Data loaders ready to read", train_folder)
def train(model, device, train_loader, optimizer, epoch):
# Set the model to training mode
model.train()
train_loss = 0
print("Epoch:", epoch)
# Process the images in batches
for batch_idx, (data, target) in enumerate(train_loader):
# Use the CPU or GPU as appropriate
# Recall that GPU is optimized for the operations we are dealing with
data, target = data.to(device), target.to(device)
# Reset the optimizer
optimizer.zero_grad()
# Push the data forward through the model layers
output = model(data)
# Get the loss
loss = loss_criteria(output, target)
# Keep a running total
train_loss += loss.item()
# Backpropagate
loss.backward()
optimizer.step()
# Print metrics so we see some progress
print('\tTraining batch {} Loss: {:.6f}'.format(batch_idx + 1, loss.item()))
# return average loss for the epoch
avg_loss = train_loss / (batch_idx+1)
print('Training set: Average loss: {:.6f}'.format(avg_loss))
return avg_loss
def test(model, device, test_loader):
# Switch the model to evaluation mode (so we don't backpropagate or drop)
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
batch_count = 0
for data, target in test_loader:
batch_count += 1
data, target = data.to(device), target.to(device)
# Get the predicted classes for this batch
output = model(data)
# Calculate the loss for this batch
test_loss += loss_criteria(output, target).item()
# Calculate the accuracy for this batch
_, predicted = torch.max(output.data, 1)
correct += torch.sum(target==predicted).item()
# Calculate the average loss and total accuracy for this epoch
avg_loss = test_loss / batch_count
print('Validation set: Average loss: {:.6f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
avg_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
# return average loss for the epoch
return avg_loss
# The images are in a folder named 'input/natural-images/natural_images'
training_folder_name = 'Dataset/pics/train'
# All images are 128x128 pixels
img_size = (128,128)
# The folder contains a subfolder for each class of shape
classes = sorted(os.listdir(training_folder_name))
print(classes)
device = "cpu"
if (torch.cuda.is_available()):
# if GPU available, use cuda (on a cpu, training will take a considerable length of time!)
device = "cuda"
# Create an instance of the model class and allocate it to the device
model = LightCNN_9Layers(num_classes=len(classes)).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.07)
# Specify the loss criteria
loss_criteria = nn.CrossEntropyLoss()
# Track metrics in these arrays
epoch_nums = []
training_loss = []
validation_loss = []
# Train over 10 epochs (We restrict to 10 for time issues)
epochs = 10
print('Training on', device)
for epoch in range(1, epochs + 1):
train_loss = train(model, device, train_loader, optimizer, epoch)
test_loss = test(model, device, test_loader)
epoch_nums.append(epoch)
training_loss.append(train_loss)
validation_loss.append(test_loss)