Problem with tensors size?

I have two generators that output images:
self.syn_skin = self.G(self.image)
self.syn_hair = self.G(self.image1).
I then apply overlay function to get other images.

def overlay_images(hair_images, skin_images):
        #os.mkdir('/kaggle/working/Ovelayed_Images/')
        #path= "/kaggle/working/Ovelayed_Images/"
        for i, syn_hair  in enumerate(hair_images):
         for j, syn_skin in enumerate(skin_images):
            img_syn_skin= skin_images.data[i]
            img_syn_hair= hair_images.data[j]
            img_syn_skin = Tensor2Image(img_syn_skin)
            img_syn_hair = Tensor2Image(img_syn_hair)
            #has_transparency = im_has_alpha(np.array(img_syn_skin))
            #print(has_transparency)
            #img_syn_skin=img_syn_skin.convert('RGB')
            #img_syn_hair=img_syn_hair.convert('RGB')
            img_syn_hair_f= img_syn_hair.copy()
            Image.Image.paste(img_syn_hair_f, img_syn_skin) 
            #print (type(img_syn_hair_f))
            img = transforms.ToTensor()(img_syn_hair_f) 
            #print(type(img))
            #print(img.size())
            output.append(img)
            #print(output.size())
            #out=torch.FloatTensor(output)
        out = torch.stack(output)
        #out= torch.tensor(output)
        #print (out.size())
        return out

These function is applied as follows:
self.overlay_realimage= overlay_images(self.hair, self.skin)
self.overlay_fakeimage= overlay_images(self.syn_hair, self.syn_skin)

where self.hair and self.skin are the groundtruth and self.syn_hair, self.syn_skin are the images generated by the generators.
I want to compute the MSE loss, when I do that:
self.L2 = nn.MSELoss()
l3=self.L2(self.overlay_realimage, self.overlay_fakeimage)

I get the following error:

RuntimeError: The size of tensor a (3) must match the size of tensor b (4) at non-singleton dimension 0

The size of the four tensors self.hair, self.skin, self.syn_hair and self.syn_skin is [1,3,256, 256] where 1 is the batch size.

When I print the size of self.overlay_realimage and self.overlay_fakeimage, I get
torch.Size([1, 3, 256, 256])
the size of fake images:
torch.Size([2, 3, 256, 256])

Although both are passed through the same function image overlay. How could the result be different. I couldn’t figure it out!
Thanks in advance

Could you post a short example (e.g., with dummy input data) that is runnable that reproduces the issue? From here it’s a bit hard to see what self.G produces etc.

this is an example:

import torch
image1= torch.rand(1, 3, 256,256)
image2= torch.rand(1, 3, 256,256)
out= overlay_images(image1, image2)
groundtruthimage =torch.rand(1, 3, 256,256)
print (image1.size())
print (image2.size())
print(out.size())
print(type(out))
loss = nn.MSELoss()
loss=loss(out, groundtruthimage)
print (loss)

the problem is in out computed using overlay_images which produces a tensor of size torch.Size([6, 3, 256, 256]).

Could you provide a version the function that works with this example (e.g., groundtruthimage doesn’t seem to accept raw tensors and requires Tensor2Image which isn’t defined here):

import torch
image1= torch.rand(1, 3, 256,256)
image2= torch.rand(1, 3, 256,256)
out= overlay_images(image1, image2)
groundtruthimage =torch.rand(1, 3, 256,256)
print (image1.size())
print (image2.size())
print(out.size())
print(type(out))
loss = nn.MSELoss()
loss=loss(out, groundtruthimage)
print (loss)

yields
NameError: name 'Tensor2Image' is not defined

this is the function

from torchvision import transforms
def Tensor2Image(img):
    """
    input (FloatTensor)
    output (PIL.Image)
    """
    #print('----------------object type--------------------')
    #print(type(img))
    img = img.cpu()                               #tensor.cpu() moves it back to memory accessible to the CPU. Tensor.cuda() is used to move a tensor to GPU memory.
    img = ((img * 0.5) + 0.5)                     # normalization
    img = transforms.ToPILImage()(img) #
    #img=img.convert('RGB')
    return img
    

I couldn’t reproduce the issue with a slightly modified version of the above code:

import torch

from torchvision import transforms
from PIL import Image

def Tensor2Image(img):
    """
    input (FloatTensor)
    output (PIL.Image)
    """
    #print('----------------object type--------------------')
    #print(type(img))
    img = img.cpu()                               #tensor.cpu() moves it back to memory accessible to the CPU. Tensor.cuda() is used to move a tensor to GPU memory.
    img = ((img * 0.5) + 0.5)                     # normalization
    img = transforms.ToPILImage()(img) #
    #img=img.convert('RGB')
    return img

def overlay_images(hair_images, skin_images):
        #os.mkdir('/kaggle/working/Ovelayed_Images/')
        #path= "/kaggle/working/Ovelayed_Images/"
        output = []
        for i, syn_hair  in enumerate(hair_images):
         for j, syn_skin in enumerate(skin_images):
            img_syn_skin= skin_images.data[i]
            img_syn_hair= hair_images.data[j]
            img_syn_skin = Tensor2Image(img_syn_skin)
            img_syn_hair = Tensor2Image(img_syn_hair)
            #has_transparency = im_has_alpha(np.array(img_syn_skin))
            #print(has_transparency)
            #img_syn_skin=img_syn_skin.convert('RGB')
            #img_syn_hair=img_syn_hair.convert('RGB')
            img_syn_hair_f= img_syn_hair.copy()
            Image.Image.paste(img_syn_hair_f, img_syn_skin) 
            #print (type(img_syn_hair_f))
            img = transforms.ToTensor()(img_syn_hair_f) 
            #print(type(img))
            #print(img.size())
            output.append(img)
            #print(output.size())
            #out=torch.FloatTensor(output)
        out = torch.stack(output)
        #out= torch.tensor(output)
        #print (out.size())
        return out

image1= torch.rand(1, 3, 256,256)
image2= torch.rand(1, 3, 256,256)
out= overlay_images(image1, image2)
groundtruthimage =torch.rand(1, 3, 256,256)
print (image1.size())
print (image2.size())
print(out.size())
print(type(out))
loss = torch.nn.MSELoss()
loss=loss(out, groundtruthimage)
print (loss)

yields

torch.Size([1, 3, 256, 256])
torch.Size([1, 3, 256, 256])
torch.Size([1, 3, 256, 256])
<class 'torch.Tensor'>
tensor(0.1649)

It might be worthwhile to check what is not matching between the original and the repro code

1 Like

I tested your code. It is working perfectly. I will compare it with mine.
Thanks a lot.