How to resolve runtime error due to tensor size mismatch?

I am trying to implement a simple autoencoder using pytorch. My dataset consists of 256X256X3 images. I have built a data loader. When I run the autoencoder, I get a runtime error:
size mismatch, m1: [76800 x 256], m2: [784 x 128] at /Users/soumith/minicondabuild3/conda-bld/pytorch_1518371252923/work/torch/lib/TH/generic/THTensorMath.c:1434

These are my hyper-parameters:

batch_size=100
learning_rate = 1e-3
num_epochs = 100

Following is the architecture of my auto-encoder:

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(3*256*256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(True), nn.Linear(64, 12), nn.ReLU(True), nn.Linear(12, 3))
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(True),
            nn.Linear(12, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.Linear(128, 3*256*256),
            nn.ReLU())

    def forward(self, x):
        x = self.encoder(x)
        #x = self.decoder(x)
        return x
This is the code I used to run my model :

for epoch in range(num_epochs):
for data in dataloader:
img = data[‘image’]
img = Variable(img)
# ===================forward=====================
output = model(img)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
print(‘epoch [{}/{}], loss:{:.4f}’
.format(epoch+1, num_epochs, loss.data[0]))
if epoch % 10 == 0:
pic = show_img(output.cpu().data)
save_image(pic, ‘./dc_img/image_{}.jpg’.format(epoch))

torch.save(model.state_dict(), ‘./conv_autoencoder.pth’)

Can someone help me in understanding what's the issue ?

Your model seems to be fine. Try to flatten the input.
I assume your image has the following dimensions: [batch_size, 3, 256, 256].
Add x.view(x.size(0), -1) to your forward method:

def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.encoder(x)
        x = self.decoder(x)
        return x
3 Likes

Thanks @ptrblck ! That resolved my issue :slight_smile:

import torch

import numpy as np

import torch.backends.cudnn as cudnn

import os

from tqdm import tqdm

from net.models import deeplabv3plus

from sklearn.metrics import accuracy_score

from net import loss

#import matplotlib as mpl

mpl.use(‘Agg’)

from apex import amp

import matplotlib.pyplot as plt

from tensorboardX import SummaryWriter

from dataset.my_datasets import MyDataSet_seg, MyValDataSet_seg

from torch.utils import data

model_urls = {‘deeplabv3plus_xception’:"/content/drive/MyDrive/Thesis/MB-DCNN-master/MB-DCNN-master/models/deeplabv3plus_xception_VOC2012_epoch46_all.pth"}

INPUT_SIZE = ‘224, 224’

w, h = map(int, INPUT_SIZE.split(’,’))

LEARNING_RATE = 0.0001

MOMENTUM = 0.9

POWER = 0.9

WEIGHT_DECAY = 0.0005

NUM_CLASSES = 1

TRAIN_NUM = 2000

BATCH_SIZE = 16

EPOCH = 500

STEPS = (TRAIN_NUM/BATCH_SIZE)*EPOCH

FP16 = True

NAME = ‘DR_CoarseSN/’

def lr_poly(base_lr, iter, max_iter, power):

return base_lr * ((1 - float(iter) / max_iter) ** (power))

def adjust_learning_rate(optimizer, i_iter):

lr = lr_poly(LEARNING_RATE, i_iter, STEPS, POWER)

optimizer.param_groups[0]['lr'] = lr

return lr

def val_mode_seg(valloader, model, path, epoch):

dice = []

sen = []

spe = []

acc = []

jac_score = []

for index, batch in enumerate(valloader):

    data, mask, name = batch

    data = data.cuda()

    mask = mask[0].data.numpy()

    val_mask = np.int64(mask > 0)

    # print(name)

    model.eval()

    with torch.no_grad():

        pred = model(data)

    pred = torch.softmax(pred, dim=1).cpu().data.numpy()

    pred_arg = np.argmax(pred[0], axis=0)

    #y_pred

    y_true_f = val_mask.reshape(val_mask.shape[0]*val_mask.shape[1], order='F')

    y_pred_f = pred_arg.reshape(pred_arg.shape[0]*pred_arg.shape[1], order='F')

    intersection = np.float(np.sum(y_true_f * y_pred_f))

    dice.append((2. * intersection) / (np.sum(y_true_f) + np.sum(y_pred_f)))

    sen.append(intersection / np.sum(y_true_f))

    intersection0 = np.float(np.sum((1 - y_true_f) * (1 - y_pred_f)))

    spe.append(intersection0 / np.sum(1 - y_true_f))

    acc.append(accuracy_score(y_true_f, y_pred_f))

    jac_score.append(intersection / (np.sum(y_true_f) + np.sum(y_pred_f) - intersection))

    if index in [100]:

        fig = plt.figure()

        ax = fig.add_subplot(131)

        ax.imshow(data[0].cpu().data.numpy().transpose(1, 2, 0))

        ax.axis('off')

        ax = fig.add_subplot(132)

        ax.imshow(mask)

        ax.axis('off')

        ax = fig.add_subplot(133)

        ax.imshow(pred_arg)

        ax.axis('off')

        fig.suptitle('RGB image,ground truth mask, predicted mask',fontsize=6)

        fig.savefig(path + name[0][:-4] + '_e' + str(epoch) + '.png', dpi=200, bbox_inches='tight')

        ax.cla()

        fig.clf()

        plt.close()

return np.array(acc), np.array(dice), np.array(sen), np.array(spe), np.array(jac_score)

def Jaccard(pred_arg, mask):

pred_arg = np.argmax(pred_arg.cpu().data.numpy(), axis=1)

mask = mask.cpu().data.numpy()

y_true_f = mask.reshape(mask.shape[0] * mask.shape[1] * mask.shape[2], order='F')

y_pred_f = pred_arg.reshape(pred_arg.shape[0] * pred_arg.shape[1] * pred_arg.shape[2], order='F')

intersection = np.float(np.sum(y_true_f * y_pred_f))

jac_score = intersection / (np.sum(y_true_f) + np.sum(y_pred_f) - intersection)

return jac_score

def main():

"""Create the network and start the training."""

writer = SummaryWriter('models/' + NAME)

cudnn.enabled = True

############# Create coarse segmentation network

model = deeplabv3plus(num_classes=NUM_CLASSES)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

model.cuda()

if FP16 is True:

    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

model = torch.nn.DataParallel(model)

############# Load pretrained weights

pretrained_dict = torch.load(model_urls['deeplabv3plus_xception'])

net_dict = model.state_dict()

pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in net_dict) and (v.shape == net_dict[k].shape)}

net_dict.update(pretrained_dict)

model.load_state_dict(net_dict)

print(len(net_dict))

print(len(pretrained_dict))

model.train()

model.float()

DR_loss = loss.Fusin_Dice_rank()

cudnn.benchmark = True

############# Load training and validation data

data_train_root = '/content/drive/MyDrive/Thesis/MB-DCNN-master/MB-DCNN-master/dataset/seg_data/Training_resize_seg/'

data_train_list = '/content/drive/MyDrive/Thesis/MB-DCNN-master/MB-DCNN-master/dataset/ISIC/Training_seg.txt'

trainloader = data.DataLoader(MyDataSet_seg(data_train_root, data_train_list, crop_size=(w, h)),

                              batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)

data_val_root = '/content/drive/MyDrive/Thesis/MB-DCNN-master/MB-DCNN-master/dataset/seg_data/ISIC-2017_Validation_Data/'

data_val_list = '/content/drive/MyDrive/Thesis/MB-DCNN-master/MB-DCNN-master/dataset/ISIC/Validation_seg.txt'

valloader = data.DataLoader(MyValDataSet_seg(data_val_root, data_val_list), batch_size=1, shuffle=False, num_workers=8,

                            pin_memory=True)

path = 'models/' + NAME

if not os.path.isdir(path):

    os.mkdir(path)

f_path = path + 'outputxx.txt'

val_jac = []

############# Start the training

for epoch in range(EPOCH):

    train_loss_D = []

    train_loss_R = []

    train_loss_total = []

    train_jac = []

    for i_iter, batch in tqdm(enumerate(trainloader)):

        step = (TRAIN_NUM/BATCH_SIZE)*epoch+i_iter

        images, labels, name = batch

        images = images.cuda()

        labels = labels.cuda().squeeze(1)

        optimizer.zero_grad()

        lr = adjust_learning_rate(optimizer, step)

        model.train()

        preds = model(images)

        print(preds.shape)

        print(labels.shape)

        loss_D, loss_R = DR_loss(preds, labels)

        term = loss_D + 0.05 * loss_R

        if FP16 is True:

            with amp.scale_loss(term, optimizer) as scaled_loss:

                scaled_loss.backward()

        else:

            term.backward()

        optimizer.step()

        writer.add_scalar('learning_rate', lr, step)

        writer.add_scalar('loss', term.cpu().data.numpy(), step)

        train_loss_D.append(loss_D.cpu().data.numpy())

        train_loss_R.append(loss_R.cpu().data.numpy())

        train_loss_total.append(term.cpu().data.numpy())

        train_jac.append(Jaccard(preds, labels))

    print("train_epoch%d: lossTotal=%f, lossDice=%f, lossRank=%f, Jaccard=%f \n" % (epoch, np.nanmean(train_loss_total), np.nanmean(train_loss_D), np.nanmean(train_loss_R), np.nanmean(train_jac)))

    ############# Start the validation

    [vacc, vdice, vsen, vspe, vjac_score] = val_mode_seg(valloader, model, path, epoch)

    line_val = "val%d: vacc=%f, vdice=%f, vsensitivity=%f, vspecifity=%f, vjac=%f \n" % \

            (epoch, np.nanmean(vacc), np.nanmean(vdice), np.nanmean(vsen), np.nanmean(vspe),

                np.nanmean(vjac_score))

    print(line_val)

    f = open(f_path, "a")

    f.write(line_val)

    ############# Plot val curve

    val_jac.append(np.nanmean(vjac_score))

    plt.figure()

    plt.plot(val_jac, label='val jaccard', color='blue', linestyle='--')

    plt.legend(loc='best')

    plt.savefig(os.path.join(path, 'jaccard.png'))

    plt.clf()

    plt.close()

    plt.show()

    plt.close('all')

    writer.add_scalar('val_Jaccard', np.nanmean(vjac_score), epoch)

    ############# Save network

    torch.save(model.state_dict(), path + 'CoarseSN_e' + str(epoch) + '.pth')

if name == ‘main’:

main()

i am getting error as :RuntimeError: The size of tensor a (50176) must match the size of tensor b (150528) at non-singleton dimension 1 i n line 181
loss_D, loss_R = DR_loss(preds, labels)

I don’t know how Fusin_Dice_rank is implemented but it seems to raise the shape mismatch.
Print the shape of all tensors and make sure the internal operation in this loss function can be executed.

PS: you can post code snippets by wrapping them into three backticks ```.
Also, apex.amp is deprecated so use torch.cuda.amp in newer PyTorch versions.

import torch

import torch.nn as nn

def dice_loss(predict, target):

smooth = 1e-5

y_true_f = target.contiguous().view(target.shape[0], -1)

y_pred_f = predict.contiguous().view(predict.shape[0], -1)

intersection = torch.sum(torch.mul(y_pred_f, y_true_f), dim=1)

union = torch.sum(y_pred_f, dim=1) + torch.sum(y_true_f, dim=1) + smooth

dice_score = (2.0 * intersection / union)

dice_loss = 1 - dice_score

return dice_loss

def rank_loss(predict, target):

top_k = 30

y_true_f = target.contiguous().view(target.shape[0], -1)

y_pred_f = predict.contiguous().view(predict.shape[0], -1)

N_topvalue, N_indice = (y_pred_f * (1 - y_true_f)).topk(top_k, dim=-1, largest=True, sorted=True)

P_values, P_indice = ((1.0 - y_pred_f) * y_true_f).topk(top_k, dim=-1, largest=True, sorted=True)

P_downvalue = 1 - P_values

beta = 1

rank_loss = 0

for i in range(top_k):

for j in range(top_k):

  th_value = N_topvalue[:,i] - beta * P_downvalue[:,j] + 0.3

  rank_loss = rank_loss + (th_value * (th_value>0).float()).mean()

return rank_loss/(top_k * top_k)

class Fusin_Dice_rank(nn.Module):

def init(self):

super(Fusin_Dice_rank, self).__init__()

def forward(self, predicts, target):

preds = torch.softmax(predicts, dim=1)

dice_loss0 = dice_loss(preds[:, 0, :, :], 1 - target)

dice_loss1 = dice_loss(preds[:, 1, :, :], target)

loss_D = (dice_loss0.mean() + dice_loss1.mean())/2.0

loss_R = rank_loss(preds[:, 1, :, :], target)

return loss_D, loss_R

This is the loss function which has been used

torch.Size([16, 1, 224, 224])
torch.Size([16, 224, 224, 3]) this is the size of tensor

Assuming you’ve given the shapes of the model output and target, respectively, I don’t think they are valid as e.g.:

preds = torch.softmax(predicts, dim=1)

wouldn’t make sense on predicts tensor of shape [16, 1, 224, 224], since only a single class is used.
The target also seems to be in a manually created channels-last memory format, which also sounds wrong.

In any case, the error is raised in dice_loss, since you are flattening two tensors with a different number of elements:

y_true_f = target.contiguous().view(target.shape[0], -1)
y_pred_f = predict.contiguous().view(predict.shape[0], -1)

which will then yield the shape mismatch in dim1 in:

intersection = torch.sum(torch.mul(y_pred_f, y_true_f), dim=1)

what would be the best way to rectify this . how can we solve this problem please provide me the solution of this…
GitHub - YtongXie/MB-DCNN: [TMI2020]A Mutual Bootstrapping Model for Automated Skin Lesion Segmentation and Classification.. I have to implement this whole set of code
please help with this