RuntimeError: mat1 and mat2 shapes cannot be multiplied (1536x16 and 1536x1000)

I’d recently copied coding from old Ebook and practice to work on pytouch lightning, but it seems to be buggy to me. I’m hope someone could help me on that

#!/usr/bin/python
import os
import torch
import torchvision
import torchmetrics
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from PIL import Image
from pytorch_lightning.callbacks import ModelCheckpoint

class LoadCancerDataset(torch.utils.data.Dataset):
    def __init__(self, data_folder, img_idx, transform = torchvision.transforms.Compose([
        torchvision.transforms.CenterCrop(32), torchvision.transforms.ToTensor() ]), dict_labels = {} ):
        self.data_folder = data_folder
        #self.list_image_files = [s for s in os.listdir(data_folder)]
        self.list_image_files = img_idx
        self.transform = transform
        self.dict_labels = dict_labels
        self.labels = [ dict_labels[img.split('.')[0]] for img in self.list_image_files ]
    def __len__(self):
        return len(self.list_image_files)
    def __getitem__(self, idx):
        image_name = os.path.join(data_folder, self.list_image_files[idx])
        image = Image.open(image_name)
        image = self.transform(image)
        image_short_name = self.list_image_files[idx].split('.')[0]
        label = self.dict_labels[image_short_name]
        return image, label
    
class CNNImageClassifier(pl.LightningModule):
    def __init__(self, learning_rate = 0.001): #learning too high : less loss but not converge, learning too low, more accuracy but take time to learn
        super().__init__()
        self.learning_rate = learning_rate
        #input_size = (256, 3, 32, 32)
        self.conv_layer1 = torch.nn.Conv2d(in_channels = 3, out_channels = 3, kernel_size = 3, stride = 1, padding = 1)
        #output_size = (256, 3, 32, 32)
        self.relu1 = torch.nn.ReLU()
        #output_size = (256, 3, 32, 32)
        self.pool = torch.nn.MaxPool2d(kernel_size = 2)
        #output_size = (256, 3, 16, 16)
        self.conv_layer2 = torch.nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 3, stride = 1, padding = 1)
        #output_size = (256, 6, 16, 16)
        self.relu2 = torch.nn.ReLU()
        
        self.fully_connected_1 = torch.nn.Linear(in_features = 16 * 16 * 6, out_features = 1000)
        self.fully_connected_2 = torch.nn.Linear(in_features = 1000, out_features = 500)
        self.fully_connected_3 = torch.nn.Linear(in_features = 500, out_features = 250)
        self.fully_connected_4 = torch.nn.Linear(in_features = 250, out_features = 120)
        self.fully_connected_5 = torch.nn.Linear(in_features = 120, out_features = 60) 
        self.fully_connected_6 = torch.nn.Linear(in_features = 60, out_features = 2)
        
        self.loss = torch.nn.CrossEntropyLoss()
        
    def forward(self, input):
        output = self.conv_layer1(input)
        output = self.relu1(output)
        output = self.pool(output)
        output = self.conv_layer2(output)
        output = self.relu2(output)
        
        #convert into one dimensional before transfer to Linear layer
        output.view(-1, 6 * 16 * 16)
        
        output = self.fully_connected_1(output)
        output = self.fully_connected_2(output)
        output = self.fully_connected_3(output)
        output = self.fully_connected_4(output)
        output = self.fully_connected_5(output)
        output = self.fully_connected_6(output)
        
        return output
    
    #reduce loss & converge
    def configure_optimizers(self):
        params = self.parameters()
        optimizer = torch.optim.Adam(params = params, lr = self.learning_rate)
        return optimizer

    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        inputs, targets = inputs.to('cuda:0'), targets.to('cuda:0')
        outputs = self(inputs)
        train_accuracy = torchmetrics.functional.accuracy(task = 'binary', preds = outputs, target = targets) #multiclass, multilabel
        loss = self.loss(outputs, targets)
        self.log('train_accuracy:', train_accuracy, prog_bar = True)
        self.log('train_loss:', loss)
        return { 'train_accuracy' : train_accuracy, 'loss' : loss }
    
    def test_step(self, batch, batch_idx):
        inputs, targets = batch
        inputs, targets = inputs.to('cuda:0'), targets.to('cuda:0')
        outputs = self.forward(inputs)
        test_accuracy = self.binary_accuracy(outputs, targets)
        loss = self.loss(outputs, targets)
        self.log('test_accuracy', test_accuracy)
        return { 'test_accuracy' : test_accuracy, 'test_loss' : loss }

np.random.seed(0)
selected_image_list = []
train_imgs_orig = os.listdir('/home/pi/Downloads/histopathologic-cancer-detection/train')
for img in np.random.choice(train_imgs_orig, 1000): #10000
    selected_image_list.append(img)
print('Length of images list:', len(selected_image_list))

np.random.seed(0)
np.random.shuffle(selected_image_list)
cancer_train_idx = selected_image_list[:1000] #8000
cancer_test_idx = selected_image_list[800:] #8000
print('Number of samples in training dataset:', len(cancer_train_idx))
print('Number of samples in testing dataset:', len(cancer_test_idx))

df_labels = pd.read_csv('/home/pi/Downloads/histopathologic-cancer-detection/train_labels.csv')

image_label_dict = {}
for img in selected_image_list:
    #print('Image:', img.split('.')[0], '==', img.split('.')[1])
    label_tuple = df_labels.loc[df_labels['id'] == img.split('.')[0]]
    image_label_dict[label_tuple['id'].values[0]] = label_tuple['label'].values[0]
    #print(label_tuple['id'].values[0], '==',label_tuple['label'].values[0])
    
#new_train_labels = pd.DataFrame({ 'id' : [], 'label' : [] })

id_list = []
label_list = []

for img in cancer_train_idx:
    label_tuple = df_labels.loc[ df_labels['id'] == img.split('.')[0]]
    id_list.append(label_tuple['id'].values[0])
    label_list.append(label_tuple['label'].values[0])
    
new_train_labels = pd.DataFrame({ 'id' : id_list, 'label' : label_list })
new_train_labels.to_csv('train_labels.csv', index = False)

print('Image Label Dicts Length:', len(image_label_dict))

data_train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.CenterCrop(32),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomVerticalFlip(),
    torchvision.transforms.ToTensor()
    ])

data_test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.CenterCrop(32),
    torchvision.transforms.ToTensor()
    ])

data_folder = '/home/pi/Downloads/histopathologic-cancer-detection/train'
train_dataset = LoadCancerDataset(
    data_folder, cancer_train_idx, transform = data_train_transforms, dict_labels = image_label_dict)

test_dataset = LoadCancerDataset(
    data_folder, cancer_test_idx, transform = data_test_transforms, dict_labels = image_label_dict)

#batch_size = 256
batch_size = 16 #(Each will be added as 96)
workers = 2
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = batch_size,
    num_workers = workers,
    pin_memory = True
    )

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = batch_size,
    num_workers = workers,
    pin_memory = True
    )

if __name__ == '__main__':
    checkpoint_callback = ModelCheckpoint()
    
    model = CNNImageClassifier()
    
    trainer = pl.Trainer(max_epochs = 500, progress_bar_refresh_rate = 50, gpus = -1)
    
    trainer.fit(model, train_dataloaders = train_dataloader)
    
    print('Model Saved Location:', checkpoint_callback.best_model_path)

The error is most likely raised in a linear layer receiving an input activation in a wrong shape. Could you properly format the code and post the full error message including the stacktrace, please?

Ok, Done, I’d preformatted my coding on my upper post and attached here with full picture of those compiled Errors.

Hope it would help you to identify those problems.

Thanks for the code! It’s still not executable so I removed the unnecessary data loading and Lightning usage. Afterwards I was able to reproduce the issue and the error is caused due to the missing assignment in:

output.view(-1, 6 * 16 * 16)

use:

output = output.view(-1, 6 * 16 * 16)
# or better
output = output.view(output.size(0), -1)

and it’ll work.
Also, posting screenshots is quite hard to read, so post the stacktraces also as code snippets the next time.

1 Like

It is worked but came out with another error as said ‘Predictions and targets expected to have the same shape, but got torch.size([16, 2]) and torch.size([16])’ under my next compiled Error.

I did attach along my compiled error screen here do hope you could help me:). TQ

Dear ptblck,

It is nevermind as I already solved the upper post problem by revert ‘torchmetrics’ to version 0.6.0 (previously is 1.0.3)

def training_step(self, batch, batch_idx):
        ...
        train_accuracy = torchmetrics.functional.accuracy(outputs, targets)
        ....
        return { 'train_accuracy' : train_accuracy, 'loss' : loss }
    

BTW, Thanks you a lot for your help.