Hi there. I built a CNN that I am training on data I generated. The CNN uses 1d convolutions since each data sample is a row of numbers rather than a picture. I am running into an issue where the model only works if the batch size is 1. I’ve seen other posts (like this one: BatchNorm1d - input shape) in this forum talk about resizing or permuting data so it will be the proper shape, but I’ve spent a month trying to figure out this issue on my own and nothing seems to work. When I run the model with a batch size that is not 1, I get the following error:
RuntimeError: expand(torch.cuda.FloatTensor{[100, 3]}, size=[3]): the number of sizes provided (1) must be greater or equal to the number of dimensions in the tensor (2)
Please see the model below.
Any help in this matter would be greatly appreciated. Thank you!
import time
from numpy import savetxt
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import csv
from numpy import genfromtxt
#%%
start = time.time() # Begin Timer
Device configuration
#device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)
device = torch.device(‘cuda:0’ if torch.cuda.is_available() else ‘cpu’)
Hyperparameters
ngrid = 16200;
num_epochs = 500;
batch_size = 1
learning_rate = 0.0003 # originally 0.001
#momentum = 0.5;
neg_slope = 0.4 ; # Neg slope for a Leaky Relu
alpha = 1; # ELU constant
Store the data in a Numpy variable
with open(‘current working data/train_data.xlsx’, ‘r’) as f1:
#with open(‘train_data.xlsx’, ‘r’) as f1:
reader = csv.reader(f1) # store excel file in variable 'reader'
training_data_np_list = list(reader) # convert 'reader' to a list
training_data_np = np.array(training_data_np_list, dtype = np.float32) # convert list into a np array
training_data_np = training_data_np[:, 0:16200]; # only keep N_grid data (get rid of labels)
with open(‘current working data/train_data_labels.xlsx’, ‘r’) as f2:
#with open(‘train_data_labels.xlsx’, ‘r’) as f2:
reader = csv.reader(f2)
training_data_labels_list = list(reader)
training_data_labels_np = np.array(training_data_labels_list, dtype = np.float32)
with open(‘current working data/test_data.xlsx’, ‘r’) as f3:
#with open(‘test_data.xlsx’, ‘r’) as f3:
reader = csv.reader(f3)
test_data_np_list = list(reader)
test_data_np = np.array(test_data_np_list, dtype = np.float32)
test_data_np = test_data_np[:, 0:16200]; # only keep n_grid data
with open(‘current working data/test_data_labels.xlsx’, ‘r’) as f4:
#with open(‘test_data_labels.xlsx’, ‘r’) as f4:
reader = csv.reader(f4)
test_data_labels_list = list(reader)
test_data_labels_np = np.array(test_data_labels_list, dtype = np.float32)
Convert Numpy data into Pytorch Tensors - The data includes the labels
#training_data_pth = torch.from_numpy(training_data_np, requires_grad=True);
#training_data_labels_pth = torch.from_numpy(training_data_labels_np, requires_grad=True);
#test_data_pth = torch.from_numpy(test_data_np, requires_grad=True);
#test_data_labels_pth = torch.from_numpy(test_data_labels_np, requires_grad=True);
Convert Numpy data into Pytorch Tensor - Original 4.21.24
training_data_pth = torch.from_numpy(training_data_np);
training_data_labels_pth = torch.from_numpy(training_data_labels_np);
test_data_pth = torch.from_numpy(test_data_np);
test_data_labels_pth = torch.from_numpy(test_data_labels_np);
print(‘Training Data Type:’, training_data_pth.dtype)
print(‘Training Data Labels Type:’, training_data_labels_pth.dtype)
print(‘Test Data Type:’, test_data_pth.dtype)
print(‘Test Data Labels Type:’, test_data_labels_pth.dtype)
Transform the tensor into a dataset
training_data = torch.utils.data.TensorDataset(training_data_pth, training_data_labels_pth);
test_data = torch.utils.data.TensorDataset(test_data_pth, test_data_labels_pth);
Wrap the dataset with a data loader - this makes it an iterable object
train_loader = DataLoader(training_data, batch_size = batch_size, shuffle = False)
test_loader = DataLoader(test_data, batch_size = batch_size, shuffle = False)
one batch of random training data samples
#data_iter = iter(train_loader)
#label_iter = iter(train_loader_labels)
#MUSIC_data = data_iter.next()
#labels = label_iter.next()
one batch of random training data samples
dataiter = iter(train_loader)
MUSIC_data, labels = next(dataiter)
print(‘Size of MUSIC_data’, MUSIC_data.size())
print(‘Size of labels’, labels.size())
CNN Model
class ConvNet(nn.Module):
def init(self):
super().init()
# First attempt - input sizes not correct bc they should be 3D unbatched
# Should there be 3 input channels? One for azimuth, one for colat, one for MUSIC output? Also consider channels as number of freq bins
#self.bn1 = nn.BatchNorm1d(1); # input to batch size is number of features (channels)
self.conv1 = nn.Conv1d( batch_size , 64 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True)
self.bn1 = nn.BatchNorm1d(batch_size);
self.conv2 = nn.Conv1d( 64 , 64 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn2 = nn.BatchNorm1d(batch_size);
self.pool1 = nn.MaxPool1d(2, 2)
self.conv3 = nn.Conv1d( 64 , 128 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn3 = nn.BatchNorm1d(batch_size);
self.conv4 = nn.Conv1d( 128 , 128 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn4 = nn.BatchNorm1d(batch_size);
self.pool2 = nn.MaxPool1d(2, 2)
self.conv5 = nn.Conv1d( 128 , 256 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn5 = nn.BatchNorm1d(batch_size);
self.conv6 = nn.Conv1d( 256 , 256 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn6 = nn.BatchNorm1d(batch_size);
self.conv7 = nn.Conv1d( 256 , 256 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn7 = nn.BatchNorm1d(batch_size);
self.pool3 = nn.MaxPool1d(3, 3)
self.conv8 = nn.Conv1d( 256 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn8 = nn.BatchNorm1d(batch_size);
self.conv9 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn9 = nn.BatchNorm1d(batch_size);
self.conv10 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn10 = nn.BatchNorm1d(batch_size);
self.pool4 = nn.MaxPool1d( 3 , 3 )
self.conv11 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn11 = nn.BatchNorm1d(batch_size);
self.conv12 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn12 = nn.BatchNorm1d(batch_size);
self.conv13 = nn.Conv1d( 512 , 1 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn13 = nn.BatchNorm1d(batch_size);
self.pool5 = nn.MaxPool1d(3, 3)
self.fc1 = nn.Linear( 147 , 147 , bias = False)
self.bn14 = nn.BatchNorm1d(147);
self.fc2 = nn.Linear( 147 , 147 , bias = False) # originally 1000, 225
self.bn15 = nn.BatchNorm1d(147);
self.fc3 = nn.Linear( 147, 3 ) # originally 225, 3
def forward(self, x):
# This was an attempt to reshape the input for use with batch sizes other than 1
# Transpose data to correct shape -
# Current shape = [embedded dim, batch size, length] = [1, 50, 16207]
# New Shape = [batch size, embedded dim, length] = [50, 1, 16207]
# swap 2nd and 3rd dims
#print('Size of x before transpose', x.size())
#x = torch.transpose(x, 1, 2);
#print('Size of x before resizing into CNN', x.size())
#x_temp = torch.empty(( 1 , batch_size , x.size( dim=1 ) ) ) # empty tensor
#x_temp[ 0 , : , : ] = x; # assign input 'x' values to empty tensor
##x = torch.transpose( x_temp, 0, 1 ); # transpose 'x_temp' and assign it to input 'x'
#x = x_temp; # assign x_temp to input 'x'
#x = x.to(device); # push data to device
# BLOCK 1
#x = self.bn1(x)
#print('x dtype after batchnorm:', x.dtype)
#print('Size of x after batchnorm', x.size())
# define leaky relu Activation Fcn, then apply as shown below
m = nn.LeakyReLU(neg_slope)
x = self.conv1(x)
x = m(x) # apply LeakyReLU
#x = torch.permute(x, (1, 0))
#x = torch.permute(x, (0, 1))
x = self.conv2(x)
x = m(x) # apply LeakyReLU
x = self.pool1(x)
# BLOCK 2
x = self.conv3(x)
x = m(x) # apply LeakyReLU
x = self.conv4(x)
x = m(x) # apply LeakyReLU
x = self.pool2(x)
# BLOCK 3
x = self.conv5(x)
x = m(x) # apply LeakyReLU
x = self.conv6(x)
x = m(x) # apply LeakyReLU
x = self.conv7(x)
x = m(x) # apply LeakyReLU
x = self.pool3(x)
# BLOCK 4
x = self.conv8(x)
x = m(x) # apply LeakyReLU
x = self.conv9(x)
x = m(x) # apply LeakyReLU
x = self.conv10(x)
x = m(x) # apply LeakyReLU
x = self.pool4(x)
# BLOCK 5
x = self.conv11(x)
x = m(x) # apply LeakyReLU
x = self.conv12(x)
x = m(x) # apply LeakyReLU
x = self.conv13(x)
x = m(x) # apply LeakyReLU
x = self.pool5(x)
# BLOCK 6
x = self.fc1(x)
x = m(x) # apply LeakyReLU
x = self.fc2(x)
x = m(x) # apply LeakyReLU
x = self.fc3(x)
return x
model = ConvNet().to(device) # Send model to GPU
#if torch.cuda.is_available(): # send model to GPU
model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = momentum)
training_loss_np_array = np.zeros([ 1 , num_epochs ]);
loss_np_array_index = 0;
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
n_train_samples = len(train_loader)
training_comparison_data = torch.zeros([n_train_samples, 6]) # empty tensor for comparing samples
training_index = 0; # iterable index variable
running_loss = 0.0
for i, (MUSIC_data, labels) in enumerate(train_loader):
MUSIC_data = MUSIC_data.to(device) # send to GPU
labels = labels.to(device) # send to GPU
# Forward pass
outputs = model(MUSIC_data)
#loss = criterion(outputs, labels) # use this for non-augmented losses
loss = torch.sqrt(criterion(outputs, labels)) # changes MSEloss to euclidean distance since MSEloss is (X**2 + Y**2 + Z**2)
# Output Comparison and Predicted vs Actual Sound Source distance calculation
training_predicted = outputs ;
training_comparison_data[training_index, :3 ] = training_predicted ;
training_comparison_data[training_index, 3:6 ] = labels ;
training_index += 1;
# Backward and optimize
loss.backward()
optimizer.step()
optimizer.zero_grad()
running_loss += loss.item()
#training_loss_np_array[ 0 : loss_np_array_index] = loss.detach().numpy();
#loss_np_array_index += 1; # index to next column in training loss array
#print(f'[{epoch + 1}] loss: {running_loss:.3f}')
print(f'[{epoch + 1}] loss: {running_loss :.3f}')
training_comparison_data_np = training_comparison_data.detach().numpy(); #convert to numpy array
print(‘Finished Training’)
#PATH = ‘./VGG16_rev_DOE3_Trial3.pth’
#torch.save(model.state_dict(), PATH)
end = time.time() # End Timer
print(‘Total Time to Execute Training[Seconds]’)
print(end - start) # Print Time to Execute Program
print(‘Total Time to Execute Training[minutes]’)
print((end - start)/60) # Print Time to Execute Program
print(‘Total Time to Execute Training[hours]’)
print((end - start)/3600) # Print Time to Execute Program
#%%
Evaluation Section
dataiter = iter(test_loader)
MUSIC_data, labels = next(dataiter)
testing_running_loss = 0.0
start = time.time() # Begin Timer
with torch.no_grad():
n_samples = len(test_loader)
comparison_data = torch.zeros([n_samples, 6]) # empty tensor for comparing samples
index = 0; # iterable index variable
for MUSIC_data, labels in test_loader:
MUSIC_data = MUSIC_data.to(device)
labels = labels.to(device)
outputs = model(MUSIC_data)
testing_loss = torch.sqrt(criterion(outputs, labels)) # changes MSEloss to euclidean distance since MSEloss is (X**2 + Y**2 + Z**2)
# Output Comparison and Predicted vs Actual Sound Source distance calculation
testing_predicted = outputs ;
#distance = torch.sqrt( (outputs[0] - labels[0] )**2 + (outputs[1] - labels[1] )**2 + ( outputs[2] - labels[2] )**2 );
comparison_data[index, :3 ] = testing_predicted ;
comparison_data[index, 3:6 ] = labels ;
#comparison_data[index, 6] = distance
index += 1;
testing_running_loss += testing_loss.item()
print(' Testing loss: {testing_running_loss :.3f}')
Convert data to numpy
comparison_data_np_array = comparison_data.numpy();
comparison_data_np_array = comparison_data.numpy() # convert tensor to np array
#savetxt(‘predicted_SSL_vs_real_SSL_CNN3_ngrid_16200_LR0.0005_LeakyRelu0.4_epochs150_5.15.24.csv’, comparison_data_np_array, delimiter=‘,’, header = ‘Predicted XYZ, Real XYZ, Euclidean Distance’)
#savetxt(‘DOE_Trial_16_Test_data_Loss_CNN3_ngrid_16200_LR0.0005_LeakyRelu0.4_epochs150_5.15.24.csv’, comparison_data_np_array, delimiter=‘,’, header = ‘Predicted XYZ, Real XYZ, Euclidean Distance’)
end = time.time() # End Timer
print(‘Total Time to Execute Testing[Seconds]’)
print(end - start) # Print Time to Execute Program
print(‘Total Time to Execute Testing[minutes]’)
print((end - start)/60) # Print Time to Execute Program
print(‘Total Time to Execute Testing[hours]’)
print((end - start)/3600) # Print Time to Execute Program
#acc = 100.0 * n_correct2 / n_samples
#print(f'Accuracy of the loaded model: {acc} %')