Target size is different from the input size

I’m trying to train an ACGAN on CIFAR-10. However, I get the following traceback:

Traceback (most recent call last):
  File "/.../cifar_acgan_3.py", line 251, in <module>
    bce_fake_loss = bce_criterion(b_disc,fake_labels)
  File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1186, in _call_impl
    return forward_call(*input, **kwargs)
  File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/loss.py", line 618, in forward
    return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 3080, in binary_cross_entropy
    raise ValueError(
ValueError: Using a target size (torch.Size([100, 1])) that is different to the input size (torch.Size([25, 1])) is deprecated. Please ensure they have the same size.

When training with an input dimension of 32x32 (in_h = in_w = 32), everything works fine. However, I would like the input (samples for D) and the output (generated images of G) to be of the size 64x64 (in_h = in_w = 64). I’m not sure how to accomplish this. Thankful for any help!

This is the code:

import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.utils as vutils

import torch.optim as optim
import torch.nn as nn

import torch.nn.functional as F

import matplotlib.pyplot as plt
import numpy as np
import random
import math

import os

batch_size = 100
epochs = 50
latent_dim = 100
class_dim = 10
gf_dim = 96
df_dim = 16
in_w = in_h = 64
c_dim = 3

device = 'cpu'

manualSeed = 3734
print("Random Seed: ",manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

transform = transforms.Compose([transforms.Resize((in_h,in_w)), transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,)),])

def transform_inverse (y):
    (...)

def batch_transform_inverse(y):
    (...)

train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set,batch_size=batch_size, shuffle =True)

test_set = torchvision.datasets.CIFAR10 (root='./data',train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle = False)

train_iter = iter(train_loader)
test_iter = iter(test_loader)

real_batch, _ = next(iter(train_loader))

def conv_bn_layer(in_channels,out_channels,kernel_size,stride=1,padding=0):
    return nn.Sequential(
        nn.Conv2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding),
        nn.BatchNorm2d(out_channels,momentum=0.1,eps=1e-5),)

def tconv_bn_layer(in_channels,out_channels,kernel_size,stride=1,padding=0,output_padding=0):
    return nn.Sequential(
        nn.ConvTranspose2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding,output_padding=output_padding),
        nn.BatchNorm2d(out_channels,momentum=0.1,eps=1e-5),)

def tconv_layer(in_channels,out_channels,kernel_size,stride=1,padding=0,output_padding=0):
    return nn.ConvTranspose2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding,output_padding=output_padding)

def conv_layer(in_channels,out_channels,kernel_size,stride=1,padding=0):
    return nn.Conv2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding)

def fc_layer(in_features,out_features):
    return nn.Linear(in_features,out_features)

def fc_bn_layer(in_features,out_features):
    return nn.Sequential(
        nn.Linear(in_features,out_features),
        nn.BatchNorm1d(out_features))

def conv_out_size_same(size, stride):
    return int(math.ceil(float(size) / float(stride)))

s_h, s_w = in_h, in_w
s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)

class Generator(nn.Module):
    def __init__(self):
        super(Generator,self).__init__()
        self.fc_layer1 = fc_layer(latent_dim+class_dim,gf_dim*8)
        self.up_sample_layer2 = tconv_bn_layer(gf_dim*8,gf_dim*4,4,2,0)
        self.up_sample_layer3 = tconv_bn_layer(gf_dim*4,gf_dim*2,4,2,1)
        self.up_sample_layer4 = tconv_bn_layer(gf_dim*2,gf_dim,4,2,1)
        self.up_sample_layer5 = tconv_layer(gf_dim,c_dim,4,2,1)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = F.relu(self.fc_layer1(x)).view(-1,gf_dim*8,1,1)
        x = F.relu(self.up_sample_layer2(x))
        x = F.relu(self.up_sample_layer3(x))
        x = F.relu(self.up_sample_layer4(x))
        x = self.up_sample_layer5(x)
        return self.tanh(x)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator,self).__init__()
        self.conv_layer1 = conv_layer(c_dim,df_dim,3,2,1)
        self.conv_layer2 = conv_bn_layer(df_dim,df_dim*2,3,1,1)
        self.conv_layer3 = conv_bn_layer(df_dim*2,df_dim*4,3,2,1)
        self.conv_layer4 = conv_bn_layer(df_dim*4,df_dim*8,3,1,1)
        self.conv_layer5 = conv_bn_layer(df_dim*8,df_dim*16,3,2,1)
        self.conv_layer6 = conv_bn_layer(df_dim*16,df_dim*32,3,1,1)
        self.aux_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,class_dim) # cls
        self.dis_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,1) # fake/real
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        x = F.leaky_relu(self.conv_layer1(x),0.2)
        x = F.leaky_relu(self.conv_layer2(x),0.2)
        x = F.leaky_relu(self.conv_layer3(x),0.2)
        x = F.leaky_relu(self.conv_layer4(x),0.2)
        x = F.leaky_relu(self.conv_layer5(x),0.2)
        x = F.leaky_relu(self.conv_layer6(x),0.2)
        x = x.view(-1,df_dim*32*s_w8*s_h8)
        aux = self.aux_fc_layer7(x)
        disc = self.dis_fc_layer7(x)

        return self.softmax(aux), self.sigmoid(disc)

def weights_init(m):
    (...)

G = Generator().to(device)
D = Discriminator().to(device)

bce_criterion = nn.BCELoss()
nll_criterion = nn.NLLLoss()

G_optimizer = optim.Adam(G.parameters(), lr=5e-4,betas=(0.5,0.999))
D_optimizer = optim.Adam(D.parameters(), lr=2e-4,betas=(0.5,0.999))

fixed_latent = torch.randn(100,latent_dim,device=device)
fixed_labels = torch.zeros(100,class_dim,device=device)

for j in range(10):
    for i in range(class_dim):
        fixed_labels[i*10+j][i]=1

fixed_noise=torch.cat((fixed_latent,fixed_labels),1)

with torch.no_grad():
    fake_batch=G(fixed_noise)

def compute_cls_acc(m_disc,cls_labels):
  return ((m_disc.argmax(dim=1) == cls_labels)*1.0).sum()/100

iter_per_plot = 10
plot_per_eps=(int(len(train_loader)/iter_per_plot))
transform_PIL=transforms.ToPILImage()

for ep  in range(epochs):
    for i, (data, cls_labels) in enumerate(train_loader):
        b_size=data.shape[0]

        data = data.to(device)
        cls_labels = cls_labels.to(device)

        cls_one_hot=torch.zeros(b_size,class_dim,device=device)
        cls_one_hot[torch.arange(b_size), cls_labels] = 1.0

        real_labels = torch.ones(b_size,1).to(device)
        fake_labels = torch.zeros(b_size,1).to(device)

        # with real
        # Train D
        D.zero_grad()
        _ , real_score = m_disc, b_disc = D(data)
        bce_real_loss = bce_criterion(b_disc,real_labels)
        cls_real_loss = nll_criterion(m_disc,cls_labels)

        real_cls_acc=compute_cls_acc(m_disc,cls_labels)#for logging

        # with fake
        latent_z = torch.randn(b_size,latent_dim).to(device)
        latent_c = cls_one_hot
        latent = torch.cat((latent_z,latent_c),dim=1)

        fake = G(latent)
        _, fake_score = m_disc, b_disc = D(fake.detach())
        bce_fake_loss = bce_criterion(b_disc,fake_labels)
        cls_fake_loss = nll_criterion(m_disc,cls_labels)

        fake_cls_acc=compute_cls_acc(m_disc,cls_labels)#for logging

        D_Ls = bce_real_loss + bce_fake_loss
        D_Lc = 1.8*cls_real_loss + 0.2*cls_fake_loss # for learning stability
        loss_D = D_Ls + D_Lc

        loss_D.backward()
        D_optimizer.step()

        #Train G
        G.zero_grad()

        m_disc, b_disc = D(fake)
        G_Ls = bce_criterion(b_disc,real_labels)
        G_Lc = nll_criterion(m_disc,cls_labels)
        loss_G = G_Ls + G_Lc
        loss_G.backward()
        G_optimizer.step()

        if (i+1)%iter_per_plot == 0 :
            print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}'.format(ep, epochs, i+1, len(train_loader), loss_D.item(), loss_G.item()))

This view operation might be wrong:

x = F.relu(self.fc_layer1(x)).view(-1,gf_dim*8,1,1)

as it could change the batch size instead of flattening the feature dimensions.
Use x = x.view(x.size(0), -1) to keep the batch size equal and fix potential shape mismatch errors raised in other layers afterwards.

Thanks for your help! However, this leads to the following error. It does not seem to me like a shape mismatch in the subsequent layers (or am I wrong?).

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv_transpose2d, but got input of size: [100, 768]

This was the change I made:

def forward(self, x):
        ####
		x = F.relu(self.fc_layer1(x))
		x = x.view(x.size(0), -1)
        ####
		x = F.relu(self.up_sample_layer2(x))
		x = F.relu(self.up_sample_layer3(x))
		x = F.relu(self.up_sample_layer4(x))
		x = self.up_sample_layer5(x)
		return self.tanh(x)

Sorry, I should have pointed out that my code is to flatten the tensor into a 2D tensor. For your use case, unsqueeze the two additional spatial dimensions via:

x = x.view(x.size(0), -1, 1, 1)

Hmm this not seem to resolve the issue, I get the same traceback as before:

ValueError: Using a target size (torch.Size([100, 1])) that is different to the input size (torch.Size([25, 1])) is deprecated. Please ensure they have the same size.

This is how the sizes change:

def forward(self, x):
		print(x.size())
		x = F.relu(self.fc_layer1(x))
		print(x.size())
		x = x.view(x.size(0), -1, 1, 1)
		print(x.size())
torch.Size([100, 110])
torch.Size([100, 768])
torch.Size([100, 768, 1, 1])
torch.Size([100, 110])
torch.Size([100, 768])
torch.Size([100, 768, 1, 1])

It seems that it is able to calculate the first layer two times before breaking.

Based on your output it seems that the model output is correct and now keeps the batch size equal, since all activations have a shape of [batch_size=100, ...]. However, the target has only 25 values so you would need to check why that’s the case.

okay, I realised that the output of G is still of size the size 32x32. This could be the source of the problem.

class Generator(nn.Module):
	def __init__(self):
		super(Generator,self).__init__()
		self.fc_layer1 = fc_layer(latent_dim+class_dim,gf_dim*8)
		self.up_sample_layer2 = tconv_bn_layer(gf_dim*8,gf_dim*4,4,2,0)
		self.up_sample_layer3 = tconv_bn_layer(gf_dim*4,gf_dim*2,4,2,1)
		self.up_sample_layer4 = tconv_bn_layer(gf_dim*2,gf_dim,4,2,1)
		self.up_sample_layer5 = tconv_layer(gf_dim,c_dim,4,2,1)
		self.tanh = nn.Tanh()

	def forward(self, x):
		# print(x.size())
		# x = F.relu(self.fc_layer1(x)).view(-1,gf_dim*8,1,1)
		x = F.relu(self.fc_layer1(x))
		# print(x.size())
		x = x.view(x.size(0), -1, 1, 1)
		# x = x.view(x.size(0), -1)
		# print(x.size())
		x = F.relu(self.up_sample_layer2(x))
		x = F.relu(self.up_sample_layer3(x))
		x = F.relu(self.up_sample_layer4(x))
		print(x.size())
		x = self.up_sample_layer5(x)
		print(x.size())
		return self.tanh(x)

this prints out

torch.Size([100, 96, 16, 16])
torch.Size([100, 3, 32, 32])

How would I have to change the layer dimensions in order to achieve an output 64x64?
Thanks alot for your help so far. And sorry, I’m still quite new to Pytorch.

The number of print statements doesn’t match the posted shapes, but I also don’t think the Generator is causing the issues, since you’ve already fixed it. Did you also fix the Discriminator as it has the same issue in x = x.view(-1,df_dim*32*s_w8*s_h8)?

Ah, sorry, I posted the last two shapes, before and after x = self.up_sample_layer5(x).

When changing the Discriminator to

class Discriminator(nn.Module):
	def __init__(self):
		super(Discriminator,self).__init__()
		self.conv_layer1 = conv_layer(c_dim,df_dim,3,2,1)
		self.conv_layer2 = conv_bn_layer(df_dim,df_dim*2,3,1,1)
		self.conv_layer3 = conv_bn_layer(df_dim*2,df_dim*4,3,2,1)
		self.conv_layer4 = conv_bn_layer(df_dim*4,df_dim*8,3,1,1)
		self.conv_layer5 = conv_bn_layer(df_dim*8,df_dim*16,3,2,1)
		self.conv_layer6 = conv_bn_layer(df_dim*16,df_dim*32,3,1,1)
		self.aux_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,class_dim) # cls
		self.dis_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,1) # fake/real
		self.sigmoid = nn.Sigmoid()
		self.softmax = nn.Softmax(dim=-1)

	def forward(self, x):
		x = F.leaky_relu(self.conv_layer1(x),0.2)
		x = F.leaky_relu(self.conv_layer2(x),0.2)
		x = F.leaky_relu(self.conv_layer3(x),0.2)
		x = F.leaky_relu(self.conv_layer4(x),0.2)
		x = F.leaky_relu(self.conv_layer5(x),0.2)
		x = F.leaky_relu(self.conv_layer6(x),0.2)
		# x = x.view(-1,df_dim*32*s_w8*s_h8)
		x = x.view(x.size(0), -1)
		aux = self.aux_fc_layer7(x)
		disc = self.dis_fc_layer7(x)

		return self.softmax(aux), self.sigmoid(disc)

I get

RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x16384 and 65536x10)

¯_(ツ)_/¯

That’s good as we are getting closer. Set the in_features of the last two linear layer which are accepting the flattened x tensor to 16384 and it should work.

nice! but not there yet :frowning:

This is the change I made:

class Discriminator(nn.Module):
	def __init__(self):
		super(Discriminator,self).__init__()
		self.conv_layer1 = conv_layer(c_dim,df_dim,3,2,1)
		self.conv_layer2 = conv_bn_layer(df_dim,df_dim*2,3,1,1)
		self.conv_layer3 = conv_bn_layer(df_dim*2,df_dim*4,3,2,1)
		self.conv_layer4 = conv_bn_layer(df_dim*4,df_dim*8,3,1,1)
		self.conv_layer5 = conv_bn_layer(df_dim*8,df_dim*16,3,2,1)
		self.conv_layer6 = conv_bn_layer(df_dim*16,df_dim*32,3,1,1)
		# self.aux_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,class_dim) # cls
		# self.dis_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,1) # fake/real
		self.aux_fc_layer7 = fc_layer(16384,class_dim) # cls
		self.dis_fc_layer7 = fc_layer(16384,1) # fake/real
		self.sigmoid = nn.Sigmoid()
		self.softmax = nn.Softmax(dim=-1)

	def forward(self, x):
		x = F.leaky_relu(self.conv_layer1(x),0.2)
		x = F.leaky_relu(self.conv_layer2(x),0.2)
		x = F.leaky_relu(self.conv_layer3(x),0.2)
		x = F.leaky_relu(self.conv_layer4(x),0.2)
		x = F.leaky_relu(self.conv_layer5(x),0.2)
		x = F.leaky_relu(self.conv_layer6(x),0.2)
		# x = x.view(-1,df_dim*32*s_w8*s_h8)
		x = x.view(x.size(0), -1)
		aux = self.aux_fc_layer7(x)
		disc = self.dis_fc_layer7(x)

		return self.softmax(aux), self.sigmoid(disc)

I get:

Traceback (most recent call last):
  File "/Users/.../cifar_acgan_3.py", line 235, in <module>
    _ , real_score = m_disc, b_disc = D(data)
  File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1186, in _call_impl
    return forward_call(*input, **kwargs)
  File "/Users/l.../cifar_acgan_3.py", line 160, in forward
    aux = self.aux_fc_layer7(x)
  File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1186, in _call_impl
    return forward_call(*input, **kwargs)
  File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x65536 and 16384x10)

Are you using variable input shapes? The previous error was given exactly the opposite shapes:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x16384 and 65536x10)

which is why I suggested to set the in_features to 16384. However, now your activation feature dimension seems to have a size of 65536 based on the error message.
If your input do not have a static shape use an adaptive pooling layer before flattening the activation to create a defined activation shape which will allow you to pass the flattened activation to the linear layer.

1 Like