AssertionError: Traceback (most recent call last):

D:\Program Files\Anaconda3\envs\lei\envs\py36-pytorch\python.exe" D:/zgy/GCC/GCC-SFCN-master/train.py
Traceback (most recent call last):
File “D:/zgy/GCC/GCC-SFCN-master/train.py”, line 250, in
main()
File “D:/zgy/GCC/GCC-SFCN-master/train.py”, line 100, in main
i_tb = train(train_loader, net, optimizer, epoch, i_tb)
File “D:/zgy/GCC/GCC-SFCN-master/train.py”, line 120, in train
for i, data in enumerate(train_loader, 0):
File “D:\Program Files\Anaconda3\envs\lei\envs\py36-pytorch\lib\site-packages\torch\utils\data\dataloader.py”, line 286, in next
return self._process_next_batch(batch)
File “D:\Program Files\Anaconda3\envs\lei\envs\py36-pytorch\lib\site-packages\torch\utils\data\dataloader.py”, line 307, in _process_next_batch
raise batch.exc_type(batch.exc_msg)
AssertionError: Traceback (most recent call last):
File “D:\Program Files\Anaconda3\envs\lei\envs\py36-pytorch\lib\site-packages\torch\utils\data\dataloader.py”, line 57, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File “D:\Program Files\Anaconda3\envs\lei\envs\py36-pytorch\lib\site-packages\torch\utils\data\dataloader.py”, line 57, in
samples = collate_fn([dataset[i] for i in batch_indices])
File “D:\zgy\GCC\GCC-SFCN-master\datasets\UCF_QNRF.py”, line 40, in getitem
img, den = self.main_transform(img,den)
File “D:\zgy\GCC\GCC-SFCN-master\misc\transforms.py”, line 16, in call
img, mask = t(img, mask)
File “D:\zgy\GCC\GCC-SFCN-master\misc\transforms.py”, line 50, in call
assert img.size == mask.size
AssertionError

Could you post the code for main_transform and t(img, mask)?
Based on the error message this function needs img and mask to have the same size, which is not the case for the current pair.

1 Like

THANK YOU VERY MUCH

import numbers
import random
import numpy as np
from PIL import Image, ImageOps, ImageFilter
from config import cfg
import torch
# ===============================img tranforms============================

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, mask, bbx=None):
        if bbx is None:
            for t in self.transforms:
                img, mask = t(img, mask)
            return img, mask
        for t in self.transforms:
            img, mask, bbx = t(img, mask, bbx)
        return img, mask, bbx

class RandomHorizontallyFlip(object):
    def __call__(self, img, mask, bbx=None):
        if random.random() < 0.5:
            if bbx is None:
                return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose(Image.FLIP_LEFT_RIGHT)
            w, h = img.size
            xmin = w - bbx[:,3]
            xmax = w - bbx[:,1]
            bbx[:,1] = xmin
            bbx[:,3] = xmax
            return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose(Image.FLIP_LEFT_RIGHT), bbx
        if bbx is None:
            return img, mask
        return img, mask, bbx

class RandomCrop(object):
    def __init__(self, size, padding=0):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size
        self.padding = padding

    def __call__(self, img, mask):
        if self.padding > 0:
            img = ImageOps.expand(img, border=self.padding, fill=0)
            mask = ImageOps.expand(mask, border=self.padding, fill=0)

        assert img.size == mask.size
        w, h = img.size
        th, tw  = self.size
        if w == tw and h == th:
            return img, mask
        if w < tw or h < th:
            return img.resize((tw, th), Image.BILINEAR), mask.resize((tw, th), Image.NEAREST)

        x1 = random.randint(0, w - tw)
        y1 = random.randint(0, h - th)
        return img.crop((x1, y1, x1 + tw, y1 + th)), mask.crop((x1, y1, x1 + tw, y1 + th))


class CenterCrop(object):
    def __init__(self, size):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size

    def __call__(self, img, mask):
        w, h = img.size
        th, tw = self.size
        x1 = int(round((w - tw) / 2.))
        y1 = int(round((h - th) / 2.))
        return img.crop((x1, y1, x1 + tw, y1 + th)), mask.crop((x1, y1, x1 + tw, y1 + th))



class FreeScale(object):
    def __init__(self, size):
        self.size = size  # (h, w)

    def __call__(self, img, mask):
        return img.resize((self.size[1], self.size[0]), Image.BILINEAR), mask.resize((self.size[1], self.size[0]), Image.NEAREST)


class ScaleDown(object):
    def __init__(self, size):
        self.size = size  # (h, w)

    def __call__(self, mask):
        return  mask.resize((self.size[1]/cfg.TRAIN.DOWNRATE, self.size[0]/cfg.TRAIN.DOWNRATE), Image.NEAREST)


class Scale(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, img, mask):
        if img.size != mask.size:
            print(img.size)
            print(mask.size)           
        assert img.size == mask.size
        w, h = img.size
        if (w <= h and w == self.size) or (h <= w and h == self.size):
            return img, mask
        if w < h:
            ow = self.size
            oh = int(self.size * h / w)
            return img.resize((ow, oh), Image.BILINEAR), mask.resize((ow, oh), Image.NEAREST)
        else:
            oh = self.size
            ow = int(self.size * w / h)
            return img.resize((ow, oh), Image.BILINEAR), mask.resize((ow, oh), Image.NEAREST)


class RandomRotate(object):
    """Crops the given PIL.Image at a random location to have a region of
    the given size. size can be a tuple (target_height, target_width)
    or an integer, in which case the target will be of a square shape (size, size)
    """

    def __init__(self, angle):
        self.angle = angle

    def __call__(self, image, label):
        assert label is None or image.size == label.size

        w, h = image.size
        p = max((h, w))
        angle = random.randint(0, self.angle * 2) - self.angle

        label = pad_image('constant', label, h, h, w, w, value=0)
        label = label.rotate(angle, resample=Image.NEAREST)
        label = label.crop((w, h, w + w, h + h))

        image = pad_image('reflection', image, h, h, w, w)
        image = image.rotate(angle, resample=Image.BILINEAR)
        image = image.crop((w, h, w + w, h + h))
        return image, label


class RoadRegionCrop(object):
    def __init__(self, road_rate_h):
        self.road_rate_h = road_rate_h

    def __call__(self, img, mask):
        w, h = img.size
        road_rate_h = self.road_rate_h
        y1 = int(round((h*road_rate_h)))
        crop_bbx = (0, y1, w, h)
        return img.crop(crop_bbx), mask.crop(crop_bbx)


# ===============================label tranforms============================

class DeNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
        return tensor


class MaskToTensor(object):
    def __call__(self, img):
        return torch.from_numpy(np.array(img, dtype=np.int32)).long()


class LabelNormalize(object):
    def __init__(self, para):
        self.para = para

    def __call__(self, tensor):
        # tensor = 1./(tensor+self.para).log()
        tensor = torch.from_numpy(np.array(tensor))
        tensor = tensor*self.para
        return tensor

class GTScaleDown(object):
    def __init__(self, factor=8):
        self.factor = factor

    def __call__(self, img):
        w, h = img.size
        if self.factor==1:
            return img
        tmp = np.array(img.resize((w/self.factor, h/self.factor), Image.BICUBIC))*self.factor*self.factor
        img = Image.fromarray(tmp)
        return img

Thanks for the code!
As said before, your transformations (e.g. RandomCrop) need an image and the corresponding mask in t he same size, which is not the case.
Are the images and masked stored in different resolutions or are you resizing (one of) them?

1 Like

Sorry, I don’t understand what the mask means. Can you give me a brief talk?Thank you.

It looks like you are dealing with e.g. a segmentation task, since you are loading an image and a mask.
Is this correct?
If so, you should make sure both inputs have the same size as your transformations won’t work otherwise.

1 Like

This code I downloaded from github for crowd counting, this happened after I replaced the author’s data set with me. I think it should be that my picture is different from the author’s picture. But I don’t know how to modify it.Thank you very much.

Could you explain your input data a bit?
I.e. what kind of input images and masks are you using?
Somewhere in your code you are most likely using a Dataset, which loads the image and the corresponding mask. Are you assuming the mask size is different than the image size?
If so, would resizing work?

1 Like

My dataset includes images and label. The image is a crowd map, and the label is the coordinates of the center point of each person’s head in the image. Does the mask mean labeling?

The code base you are using assumes that you are providing an image mask, not coordinates.
If you don’t have this type of mask, I would recommend to write a custom Dataset, loading the input images and the corresponding head positions.

1 Like

Ok, I will try your method, thank you very very much.