RuntimeError: stack expects each tensor to be equal size,

How to solve this error? Can someone specifically give me a good example of why this error would happen and how to fix it?
Here is the Error: RuntimeError: stack expects each tensor to be equal size,

The error is raised if the shape of all samples creating the batch differs.
Here is a small example:

class MyDataset(Dataset):
    def __init__(self, transform=None):
        self.data = [torch.randn(3, 224, 224), torch.randn(3, 250, 250)]
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        x = self.data[index]
        if self.transform:
            x = self.transform(x)
        return x

dataset = MyDataset()

# works
loader = DataLoader(dataset, batch_size=1)
for img in loader:
    print(img.shape)
# torch.Size([1, 3, 224, 224])
# torch.Size([1, 3, 250, 250])

# fails
loader = DataLoader(dataset, batch_size=2)
for img in loader:
    print(img.shape)
# RuntimeError: stack expects each tensor to be equal size, but got [3, 224, 224] at entry 0 and [3, 250, 250] at entry 1

# works again
dataset = MyDataset(transform=transforms.Resize((224, 224)))
loader = DataLoader(dataset, batch_size=2)
for img in loader:
    print(img.shape)
# torch.Size([2, 3, 224, 224])

You could resize or pad the samples to create tensors in the same shape.

Here is my code for Music Genre Classification ( I cite Valerio Velardo’s Music Genre classification code and PyTorch website)
import torch
import torchaudio
from torch.utils.data import DataLoader
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

Global Variables

batch_size = 20
n_mfcc = 40

Gathering & Loading Data / Device

train_data = torchaudio.datasets.GTZAN(
root=“MUSIC_CLIPS”,
url=“http://opihi.cs.uvic.ca/sound/genres.tar.gz”,
folder_in_archive=“/Users/shivakannan/PycharmProjects/MUSIC_GENRE_CLASSIFICATION_project/genres_original”,
download=True,
subset=“training”
)

test_data = torchaudio.datasets.GTZAN(
root=“MUSIC_CLIPS”,
url=“http://opihi.cs.uvic.ca/sound/genres.tar.gz”,
folder_in_archive=“/Users/shivakannan/PycharmProjects/MUSIC_GENRE_CLASSIFICATION_project/genres_original”,
download=True,
subset=“testing”
)

train_load = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_load = DataLoader(test_data, batch_size=batch_size, shuffle=True)

device = torch.device(“mps”)

Dictionary for mapping

labels = {

'blues': torch.tensor(0, dtype=torch.float32),
'classical': torch.tensor(1, dtype=torch.float32),
'country': torch.tensor(2, dtype=torch.float32),
'disco': torch.tensor(3, dtype=torch.float32),
'hiphop': torch.tensor(4, dtype=torch.float32),
'jazz': torch.tensor(5, dtype=torch.float32),
'metal': torch.tensor(6, dtype=torch.float32),
'pop': torch.tensor(7, dtype=torch.float32),
'reggae': torch.tensor(8, dtype=torch.float32),
'rock': torch.tensor(9, dtype=torch.float32)

}

Extracting waveforms

items_train =
X_train =
y_train =
for ii in range(443):
items_train.append(torchaudio.datasets.GTZAN.getitem(train_data, n=ii))
X_train.append(items_train[ii][0][0][0:660000])
y_train.append(labels[items_train[ii][2]])

items_test =
X_test =
y_test =
for ii in range(290):
items_test.append(torchaudio.datasets.GTZAN.getitem(test_data, n=ii))
X_test.append(items_test[ii][0][0][0:660000])
y_test.append(labels[items_test[ii][2]])

X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

EVERYTHING WORKS TILL NOW

num_segments = 100

Extracting MFCCs

MFCC_transform = torchaudio.transforms.MFCC(
sample_rate=22050,
n_mfcc=n_mfcc,
norm=“ortho”,
log_mels=False,
)

MFCC_train =

for ii in range(443):

temp = []
for jj in range(num_segments):

    start_idx = jj * 6600
    end_idx = start_idx + 6600

    signal = X_train[ii][start_idx:end_idx]
    temp.append(MFCC_transform(signal))

MFCC_train.append(temp)

MFCC_train = torch.from_numpy(np.array(MFCC_train))

Everything Works Till Here

MFCC_test =

for ii in range(290):

temp = []
for jj in range(num_segments):

    start_idx = jj * 6600
    end_idx = start_idx + 6600

    signal = X_test[ii][start_idx:end_idx]
    temp.append(MFCC_transform(signal))

MFCC_test.append(temp)

MFCC_test = torch.from_numpy(np.array(MFCC_test))

MFCCs for Training and Testing have been Obtained

Expected Outputs for Training and Testing have been Obtained

y_train_use = torch.zeros((443, 100))
y_test_use = torch.zeros((290, 100))

for ii in range(443):

for jj in range(100):

    y_train_use[ii][jj] = y_train[ii]

for ii in range(290):

for jj in range(100):

    y_test_use[ii][jj] = y_test[ii]

Everything Works Till Here

Neural Network Class

class NN(nn.Module):

def __init__(self):

    super().__init__()

    self.flatten = nn.Flatten()

    self.linear_relu_stack = nn.Sequential(

        nn.Linear(40*34, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10)

    )

def forward(self, x):

    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NN().to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-6)

Everything Works Till Here

loss_list =
idx_list =
def train(model, dataloader):

count = 0
size = len(dataloader.dataset)
model.train()

for ii in range(443):

    for jj in range(5):

        idx_list.append(count)
        X = MFCC_train[ii][jj * batch_size : jj * batch_size + batch_size]
        #print("XSIZE", X.size())
        y = np.array(y_train_use[ii][jj * batch_size : jj * batch_size + batch_size])
        X = X.to(device)
        y = torch.from_numpy(y).to(device)

        pred = model(X)

        loss = loss_fn(pred, y)
        #print("LOSS", type(loss))

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()


        loss_list.append(loss.item())

        count += 1


plt.plot(np.array(idx_list), np.array(loss_list))
plt.show()

train(model, train_load)

The plot for loss for training fluctuates so much.
Here is the plot:
Figure_1

Please explain me possible issues and ways to improve this plot.