Hi there. I built a CNN that I am training on data I generated. The CNN uses 1d convolutions since each data sample is a row of numbers rather than a picture. I am running into an issue where the model only works if the batch size is 1. I’ve seen other posts (like this one: BatchNorm1d - input shape) in this forum talk about resizing or permuting data so it will be the proper shape, but I’ve spent a month trying to figure out this issue on my own and nothing seems to work. When I run the model with a batch size that is not 1, I get the following error:

RuntimeError: expand(torch.cuda.FloatTensor{[100, 3]}, size=[3]): the number of sizes provided (1) must be greater or equal to the number of dimensions in the tensor (2)

Please see the model below.

Any help in this matter would be greatly appreciated. Thank you!

import time

from numpy import savetxt

import torch

from torch.utils.data import DataLoader

import torch.nn as nn

import torch.nn.functional as F

import torchvision

import torchvision.transforms as transforms

import matplotlib.pyplot as plt

import numpy as np

import csv

from numpy import genfromtxt

#%%

start = time.time() # Begin Timer

# Device configuration

#device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

device = torch.device(‘cuda:0’ if torch.cuda.is_available() else ‘cpu’)

# Hyperparameters

ngrid = 16200;

num_epochs = 500;

batch_size = 1

learning_rate = 0.0003 # originally 0.001

#momentum = 0.5;

neg_slope = 0.4 ; # Neg slope for a Leaky Relu

alpha = 1; # ELU constant

# Store the data in a Numpy variable

with open(‘current working data/train_data.xlsx’, ‘r’) as f1:

#with open(‘train_data.xlsx’, ‘r’) as f1:

```
reader = csv.reader(f1) # store excel file in variable 'reader'
training_data_np_list = list(reader) # convert 'reader' to a list
training_data_np = np.array(training_data_np_list, dtype = np.float32) # convert list into a np array
training_data_np = training_data_np[:, 0:16200]; # only keep N_grid data (get rid of labels)
```

with open(‘current working data/train_data_labels.xlsx’, ‘r’) as f2:

#with open(‘train_data_labels.xlsx’, ‘r’) as f2:

```
reader = csv.reader(f2)
training_data_labels_list = list(reader)
training_data_labels_np = np.array(training_data_labels_list, dtype = np.float32)
```

with open(‘current working data/test_data.xlsx’, ‘r’) as f3:

#with open(‘test_data.xlsx’, ‘r’) as f3:

```
reader = csv.reader(f3)
test_data_np_list = list(reader)
test_data_np = np.array(test_data_np_list, dtype = np.float32)
test_data_np = test_data_np[:, 0:16200]; # only keep n_grid data
```

with open(‘current working data/test_data_labels.xlsx’, ‘r’) as f4:

#with open(‘test_data_labels.xlsx’, ‘r’) as f4:

```
reader = csv.reader(f4)
test_data_labels_list = list(reader)
test_data_labels_np = np.array(test_data_labels_list, dtype = np.float32)
```

# Convert Numpy data into Pytorch Tensors - The data includes the labels

#training_data_pth = torch.from_numpy(training_data_np, requires_grad=True);

#training_data_labels_pth = torch.from_numpy(training_data_labels_np, requires_grad=True);

#test_data_pth = torch.from_numpy(test_data_np, requires_grad=True);

#test_data_labels_pth = torch.from_numpy(test_data_labels_np, requires_grad=True);

# Convert Numpy data into Pytorch Tensor - Original 4.21.24

training_data_pth = torch.from_numpy(training_data_np);

training_data_labels_pth = torch.from_numpy(training_data_labels_np);

test_data_pth = torch.from_numpy(test_data_np);

test_data_labels_pth = torch.from_numpy(test_data_labels_np);

print(‘Training Data Type:’, training_data_pth.dtype)

print(‘Training Data Labels Type:’, training_data_labels_pth.dtype)

print(‘Test Data Type:’, test_data_pth.dtype)

print(‘Test Data Labels Type:’, test_data_labels_pth.dtype)

# Transform the tensor into a dataset

training_data = torch.utils.data.TensorDataset(training_data_pth, training_data_labels_pth);

test_data = torch.utils.data.TensorDataset(test_data_pth, test_data_labels_pth);

# Wrap the dataset with a data loader - this makes it an iterable object

train_loader = DataLoader(training_data, batch_size = batch_size, shuffle = False)

test_loader = DataLoader(test_data, batch_size = batch_size, shuffle = False)

# one batch of random training data samples

#data_iter = iter(train_loader)

#label_iter = iter(train_loader_labels)

#MUSIC_data = data_iter.next()

#labels = label_iter.next()

# one batch of random training data samples

dataiter = iter(train_loader)

MUSIC_data, labels = next(dataiter)

print(‘Size of MUSIC_data’, MUSIC_data.size())

print(‘Size of labels’, labels.size())

# CNN Model

class ConvNet(nn.Module):

def **init**(self):

super().**init**()

```
# First attempt - input sizes not correct bc they should be 3D unbatched
# Should there be 3 input channels? One for azimuth, one for colat, one for MUSIC output? Also consider channels as number of freq bins
#self.bn1 = nn.BatchNorm1d(1); # input to batch size is number of features (channels)
self.conv1 = nn.Conv1d( batch_size , 64 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True)
self.bn1 = nn.BatchNorm1d(batch_size);
self.conv2 = nn.Conv1d( 64 , 64 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn2 = nn.BatchNorm1d(batch_size);
self.pool1 = nn.MaxPool1d(2, 2)
self.conv3 = nn.Conv1d( 64 , 128 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn3 = nn.BatchNorm1d(batch_size);
self.conv4 = nn.Conv1d( 128 , 128 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn4 = nn.BatchNorm1d(batch_size);
self.pool2 = nn.MaxPool1d(2, 2)
self.conv5 = nn.Conv1d( 128 , 256 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn5 = nn.BatchNorm1d(batch_size);
self.conv6 = nn.Conv1d( 256 , 256 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn6 = nn.BatchNorm1d(batch_size);
self.conv7 = nn.Conv1d( 256 , 256 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn7 = nn.BatchNorm1d(batch_size);
self.pool3 = nn.MaxPool1d(3, 3)
self.conv8 = nn.Conv1d( 256 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn8 = nn.BatchNorm1d(batch_size);
self.conv9 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn9 = nn.BatchNorm1d(batch_size);
self.conv10 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn10 = nn.BatchNorm1d(batch_size);
self.pool4 = nn.MaxPool1d( 3 , 3 )
self.conv11 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output is 1 x 900
self.bn11 = nn.BatchNorm1d(batch_size);
self.conv12 = nn.Conv1d( 512 , 512 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn12 = nn.BatchNorm1d(batch_size);
self.conv13 = nn.Conv1d( 512 , 1 , kernel_size = 3, stride = 1, dilation = 1, groups = 1, bias = True) # output size = [(W−K+2P)/S]+1, W = flattened image length, K = kernel size, P = padding, S = Stride
self.bn13 = nn.BatchNorm1d(batch_size);
self.pool5 = nn.MaxPool1d(3, 3)
self.fc1 = nn.Linear( 147 , 147 , bias = False)
self.bn14 = nn.BatchNorm1d(147);
self.fc2 = nn.Linear( 147 , 147 , bias = False) # originally 1000, 225
self.bn15 = nn.BatchNorm1d(147);
self.fc3 = nn.Linear( 147, 3 ) # originally 225, 3
def forward(self, x):
# This was an attempt to reshape the input for use with batch sizes other than 1
# Transpose data to correct shape -
# Current shape = [embedded dim, batch size, length] = [1, 50, 16207]
# New Shape = [batch size, embedded dim, length] = [50, 1, 16207]
# swap 2nd and 3rd dims
#print('Size of x before transpose', x.size())
#x = torch.transpose(x, 1, 2);
#print('Size of x before resizing into CNN', x.size())
#x_temp = torch.empty(( 1 , batch_size , x.size( dim=1 ) ) ) # empty tensor
#x_temp[ 0 , : , : ] = x; # assign input 'x' values to empty tensor
##x = torch.transpose( x_temp, 0, 1 ); # transpose 'x_temp' and assign it to input 'x'
#x = x_temp; # assign x_temp to input 'x'
#x = x.to(device); # push data to device
# BLOCK 1
#x = self.bn1(x)
#print('x dtype after batchnorm:', x.dtype)
#print('Size of x after batchnorm', x.size())
# define leaky relu Activation Fcn, then apply as shown below
m = nn.LeakyReLU(neg_slope)
x = self.conv1(x)
x = m(x) # apply LeakyReLU
#x = torch.permute(x, (1, 0))
#x = torch.permute(x, (0, 1))
x = self.conv2(x)
x = m(x) # apply LeakyReLU
x = self.pool1(x)
# BLOCK 2
x = self.conv3(x)
x = m(x) # apply LeakyReLU
x = self.conv4(x)
x = m(x) # apply LeakyReLU
x = self.pool2(x)
# BLOCK 3
x = self.conv5(x)
x = m(x) # apply LeakyReLU
x = self.conv6(x)
x = m(x) # apply LeakyReLU
x = self.conv7(x)
x = m(x) # apply LeakyReLU
x = self.pool3(x)
# BLOCK 4
x = self.conv8(x)
x = m(x) # apply LeakyReLU
x = self.conv9(x)
x = m(x) # apply LeakyReLU
x = self.conv10(x)
x = m(x) # apply LeakyReLU
x = self.pool4(x)
# BLOCK 5
x = self.conv11(x)
x = m(x) # apply LeakyReLU
x = self.conv12(x)
x = m(x) # apply LeakyReLU
x = self.conv13(x)
x = m(x) # apply LeakyReLU
x = self.pool5(x)
# BLOCK 6
x = self.fc1(x)
x = m(x) # apply LeakyReLU
x = self.fc2(x)
x = m(x) # apply LeakyReLU
x = self.fc3(x)
return x
```

model = ConvNet().to(device) # Send model to GPU

#if torch.cuda.is_available(): # send model to GPU

# model.cuda()

criterion = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = momentum)

training_loss_np_array = np.zeros([ 1 , num_epochs ]);

loss_np_array_index = 0;

n_total_steps = len(train_loader)

for epoch in range(num_epochs):

```
n_train_samples = len(train_loader)
training_comparison_data = torch.zeros([n_train_samples, 6]) # empty tensor for comparing samples
training_index = 0; # iterable index variable
running_loss = 0.0
for i, (MUSIC_data, labels) in enumerate(train_loader):
MUSIC_data = MUSIC_data.to(device) # send to GPU
labels = labels.to(device) # send to GPU
# Forward pass
outputs = model(MUSIC_data)
#loss = criterion(outputs, labels) # use this for non-augmented losses
loss = torch.sqrt(criterion(outputs, labels)) # changes MSEloss to euclidean distance since MSEloss is (X**2 + Y**2 + Z**2)
# Output Comparison and Predicted vs Actual Sound Source distance calculation
training_predicted = outputs ;
training_comparison_data[training_index, :3 ] = training_predicted ;
training_comparison_data[training_index, 3:6 ] = labels ;
training_index += 1;
# Backward and optimize
loss.backward()
optimizer.step()
optimizer.zero_grad()
running_loss += loss.item()
#training_loss_np_array[ 0 : loss_np_array_index] = loss.detach().numpy();
#loss_np_array_index += 1; # index to next column in training loss array
#print(f'[{epoch + 1}] loss: {running_loss:.3f}')
print(f'[{epoch + 1}] loss: {running_loss :.3f}')
```

training_comparison_data_np = training_comparison_data.detach().numpy(); #convert to numpy array

print(‘Finished Training’)

#PATH = ‘./VGG16_rev_DOE3_Trial3.pth’

#torch.save(model.state_dict(), PATH)

end = time.time() # End Timer

print(‘Total Time to Execute Training[Seconds]’)

print(end - start) # Print Time to Execute Program

print(‘Total Time to Execute Training[minutes]’)

print((end - start)/60) # Print Time to Execute Program

print(‘Total Time to Execute Training[hours]’)

print((end - start)/3600) # Print Time to Execute Program

#%%

# Evaluation Section

dataiter = iter(test_loader)

MUSIC_data, labels = next(dataiter)

testing_running_loss = 0.0

start = time.time() # Begin Timer

with torch.no_grad():

```
n_samples = len(test_loader)
comparison_data = torch.zeros([n_samples, 6]) # empty tensor for comparing samples
index = 0; # iterable index variable
for MUSIC_data, labels in test_loader:
MUSIC_data = MUSIC_data.to(device)
labels = labels.to(device)
outputs = model(MUSIC_data)
testing_loss = torch.sqrt(criterion(outputs, labels)) # changes MSEloss to euclidean distance since MSEloss is (X**2 + Y**2 + Z**2)
# Output Comparison and Predicted vs Actual Sound Source distance calculation
testing_predicted = outputs ;
#distance = torch.sqrt( (outputs[0] - labels[0] )**2 + (outputs[1] - labels[1] )**2 + ( outputs[2] - labels[2] )**2 );
comparison_data[index, :3 ] = testing_predicted ;
comparison_data[index, 3:6 ] = labels ;
#comparison_data[index, 6] = distance
index += 1;
testing_running_loss += testing_loss.item()
print(' Testing loss: {testing_running_loss :.3f}')
```

# Convert data to numpy

comparison_data_np_array = comparison_data.numpy();

comparison_data_np_array = comparison_data.numpy() # convert tensor to np array

#savetxt(‘predicted_SSL_vs_real_SSL_CNN3_ngrid_16200_LR0.0005_LeakyRelu0.4_epochs150_5.15.24.csv’, comparison_data_np_array, delimiter=‘,’, header = ‘Predicted XYZ, Real XYZ, Euclidean Distance’)

#savetxt(‘DOE_Trial_16_Test_data_Loss_CNN3_ngrid_16200_LR0.0005_LeakyRelu0.4_epochs150_5.15.24.csv’, comparison_data_np_array, delimiter=‘,’, header = ‘Predicted XYZ, Real XYZ, Euclidean Distance’)

end = time.time() # End Timer

print(‘Total Time to Execute Testing[Seconds]’)

print(end - start) # Print Time to Execute Program

print(‘Total Time to Execute Testing[minutes]’)

print((end - start)/60) # Print Time to Execute Program

print(‘Total Time to Execute Testing[hours]’)

print((end - start)/3600) # Print Time to Execute Program

```
#acc = 100.0 * n_correct2 / n_samples
#print(f'Accuracy of the loaded model: {acc} %')
```