# The size of tensor a (32) must match the size of tensor b (8) at non-singleton dimension 0 error

I was training my model when this error surfaced up

``````<ipython-input-14-b4c5dff1539c> in <module>()
1 model_conv = train_model(model_conv,train_dl,val_dl, criterion, optimizer_conv,
----> 2                          exp_lr_scheduler, num_epochs=25)

<ipython-input-13-f484d720bf26> in train_model(model, train_dl, val_dl, criterion, optimizer, scheduler, num_epochs)
57            #stats
58            running_validation_loss += loss_val.item()* image.size(0)
---> 59            running_corrects_validation += torch.sum(preds_val == labels)
60
61

The size of tensor a (32) must match the size of tensor b (8) at non-singleton dimension 0
``````

The error was directed at this code snippet

``````for image,label in val_dl:
model.eval()
image,label = image.to(device), label.to(device)

outputs = model(image)
_, preds_val = torch.max(outputs, 1)
loss_val = criterion(outputs, label)

#stats
running_validation_loss += loss_val.item()* image.size(0)
----> running_corrects_validation += torch.sum(preds_val == labels)
``````

So I figured it may be something wrong with the dimensions of my tensor, hence I decided to test out this theory

Input:

``````for images,labels in train_dl:
output = model_conv(images)
loss = F.cross_entropy(output,labels)
_,preds_vals = torch.max(output,1)
accuracy = torch.sum(preds_vals==labels)

print(f"Output: {output.size()}" )
print(f"labels: {labels.size()}")
print(f"preds_vals: {preds_vals.size()}" )
print(f"accuracy: {accuracy}")
break
``````

Output:

``````Output: torch.Size([32, 10])
labels: torch.Size([32])
preds_vals: torch.Size([32])
accuracy: 1
``````

Nothing seems to be off, however after researching quite a bit, I think it may be the loss function which may be causing all of this problems. Does anybody have an idea why this is happening. My full code will be listed below

Full code

``````data = download_url("https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz",".")

with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
tar.extractall(path='./data')

if os.path.exists("/content/data/cifar10/validate") is False:

os.makedirs("/content/data/cifar10/validate")

os.makedirs("/content/data/cifar10/validate/airplane")

os.makedirs("/content/data/cifar10/validate/automobile")

os.makedirs("/content/data/cifar10/validate/bird")

os.makedirs("/content/data/cifar10/validate/cat")

os.makedirs("/content/data/cifar10/validate/deer")

os.makedirs("/content/data/cifar10/validate/dog")

os.makedirs("/content/data/cifar10/validate/frog")

os.makedirs("/content/data/cifar10/validate/horse")

os.makedirs("/content/data/cifar10/validate/ship")

os.makedirs("/content/data/cifar10/validate/truck")

for i in sample(glob.glob("/content/data/cifar10/train/airplane/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/airplane")

for i in sample(glob.glob("/content/data/cifar10/train/automobile/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/automobile")

for i in sample(glob.glob("/content/data/cifar10/train/bird/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/bird")

for i in sample(glob.glob("/content/data/cifar10/train/cat/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/cat")

for i in sample(glob.glob("/content/data/cifar10/train/deer/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/deer")

for i in sample(glob.glob("/content/data/cifar10/train/dog/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/dog")

for i in sample(glob.glob("/content/data/cifar10/train/frog/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/frog")

for i in sample(glob.glob("/content/data/cifar10/train/horse/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/horse")

for i in sample(glob.glob("/content/data/cifar10/train/ship/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/ship")

for i in sample(glob.glob("/content/data/cifar10/train/truck/*.png"),500):
shutil.move(i,"/content/data/cifar10/validate/truck")

from torch.utils.data.dataset import random_split
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.25, 0.25, 0.25])

transform =  transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])

train_ds = ImageFolder("/content/data/cifar10/train",transform)
val_ds = ImageFolder("/content/data/cifar10/validate", transform)

train_dl = DataLoader(train_ds, batch_size = 32, shuffle=True, num_workers=2, pin_memory=True)
val_dl  =  DataLoader(val_ds,batch_size = 32,shuffle = False, num_workers=2, pin_memory=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_model(model, train_dl , val_dl, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
running_training_loss = 0.0
running_validation_loss = 0.0
running_corrects_training = 0
running_corrects_validation = 0

for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)

#training loop
for images,labels in train_dl:

images,labels = images.to(device), labels.to(device)

model.train(True):
outputs = model(images)
_, preds_train = torch.max(outputs, 1)
loss_train = criterion(outputs, labels)

loss_train.backward()
optimizer.step()

#stats
running_training_loss += loss_train.item()
running_corrects_training += torch.sum(preds_train == labels.data)

#scheduler step
scheduler.step()

#stats
epoch_loss_training = running_training_loss / len(train_dl)
epoch_acc_training = running_corrects_training.double() / len(train_dl)

#validation loop
for image,label in val_dl:
image,label = image.to(device), label.to(device)

model.eval():
outputs = model(image)
_, preds_val = torch.max(outputs, 1)
loss_val = criterion(outputs, label)

#stats
running_validation_loss += loss_val.item()
running_corrects_validation += torch.sum(preds_val == labels.data)

epoch_loss_validation = running_validation_loss / len(val_dl)
epoch_acc_validation = running_corrects_validation.double() / len(val_dl)

print("epoch {}, epoch training loss {}, epoch training acc {}, epoch_loss_validation{}, epoch_acc_validation" .format(epoch, epoch_loss_training, epoch_acc_training,
epoch_loss_validation, epoch_acc_validation))

#finding best accuracy
if running_corrects_validation > best_acc:
best_acc = running_corrects_validation
best_model_wts = copy.deepcopy(model.state_dict())
print()

# total time
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

return model

model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():

#defining parameters

num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 10)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv = train_model(model_conv,train_dl, val_dl, criterion, optimizer_conv,exp_lr_scheduler, num_epochs=25)
``````

In your training loop you define β`image, labelS`β and in your validation loop you define β`image, label`β (no βSβ at the end of label).

Yet, when you do

`running_corrects_validation += torch.sum(preds_val == labels.data)`

You are using βlabelsβ with an βsβ.

1 Like