Pytorch Layer Dimension Debugging (Completely New To PT)

Hello all, I am currently working on my first-ever implementation of a GAN. This is not a DCGAN, just a simple GAN with linear layers. I keep getting an error about the dimensions of my tensors being off, and I can’t find a way to debug it. The exact error message is:

“stack expects each tensor to be equal size, but got [3, 915, 784] at entry 0 and [3, 925, 784] at entry 1”

I tried to print out the shape of the layers, but I’m getting an error saying that there is no size or shape method associated with layers. Can someone help me figure out how to debug things in Pytorch? I’ve written everything just getting it to train is hard because I am running into dimension issues. Here is my code:

class Discriminator(nn.Module):
    def __init__(self, img_dim):
        super().__init__()
        self.disc = nn.Sequential(
            nn.Linear(img_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.disc(x)

class Generator(nn.Module):
    def __init__(self, z_dim, img_dim):
        super().__init__()
        self.gen = nn.Sequential(
            nn.Linear(z_dim, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, img_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.gen(x)

device = "cuda" if torch.cuda.is_available() else "cpu"

img_dim = 28 * 28 * 1
z_dim = 32
learning_rate = 1e-4
epochs = 100
batch_size = 20
fixed_noise = torch.randn(batch_size, z_dim).to(device=device)

Disc = Discriminator(img_dim = img_dim).to(device=device)
Gen = Generator(z_dim = z_dim, img_dim = img_dim).to(device=device)
disc_optim = optim.Adam(Disc.parameters(), lr=learning_rate)
gen_optim = optim.Adam(Gen.parameters(), lr=learning_rate)
criterion = nn.BCELoss()


transformations = transforms.Compose([transforms.Resize(img_dim), transforms.ToTensor()])
dataset = BrainTumorsDataset(csv_file='BrainTumorPaths.csv', root_dir='glioma_tumor', transform=transformations)

data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

for i in range(epochs):
    for batch_idx, (real, _) in enumerate(data_loader):
        real = real.view(-1, 784).to(device=device)
        batch_size = real.shape[0]


        #Train Discriminator -> maximize Loss FN:  log(D(real)) + log(1-D(G(z)))
        z = torch.randn(batch_size, z_dim) #generate random noise

        fake = Gen(z)#get a fake image

        disc_real = Disc(real).view(-1) #compute log(D(real))
        disc_real_loss = criterion(disc_real, torch.ones_like(disc_real))

        disc_fake = Disc(fake).view(-1) #compute log(1-D(G(z)))
        disc_fake_loss = criterion(disc_fake, torch.zeros_like(disc_fake))

        total_disc_loss = (disc_real_loss + disc_fake_loss)/2

        Disc.zero_grad()
        total_disc_loss.backward(retain_graph=True)
        disc_optim.step()

    
        #Train Generator -> maximize Loss FN: log(D(G(z)))

        gen_output = Disc(fake).view(-1)
        gen_loss = criterion(gen_output, torch.ones_like(gen_output))

        Gen.zero_grad()
        gen_loss.backward()
        gen_optim.step()


        if batch_idx == 0:
            print(
                f'Epoch: {i}/{epochs}, \
                Loss Discriminator: {total_disc_loss:.4f},\
                Loss Generator: {gen_loss:.4f}'
            )


Your posted model doesn’t raise the shape mismatch error, as seen in this code snippet which feeds random inputs to it using the defined shape:

for i in range(epochs):
    real = torch.randn(batch_size, img_dim)
    real = real.view(-1, 784).to(device=device)
    batch_size = real.shape[0]


    #Train Discriminator -> maximize Loss FN:  log(D(real)) + log(1-D(G(z)))
    z = torch.randn(batch_size, z_dim) #generate random noise

    fake = Gen(z)#get a fake image

    disc_real = Disc(real).view(batch_size, -1) #compute log(D(real))
    disc_real_loss = criterion(disc_real, torch.ones_like(disc_real))

    disc_fake = Disc(fake).view(-1) #compute log(1-D(G(z)))
    disc_fake_loss = criterion(disc_fake, torch.zeros_like(disc_fake))

    total_disc_loss = (disc_real_loss + disc_fake_loss)/2

    Disc.zero_grad()
    total_disc_loss.backward(retain_graph=True)
    disc_optim.step()

    #Train Generator -> maximize Loss FN: log(D(G(z)))

    gen_output = Disc(fake).view(-1)
    gen_loss = criterion(gen_output, torch.ones_like(gen_output))

    Gen.zero_grad()
    gen_loss.backward()
    gen_optim.step()

    print(
        f'Epoch: {i}/{epochs}, \
        Loss Discriminator: {total_disc_loss:.4f},\
        Loss Generator: {gen_loss:.4f}'
    )

Based on the mentioned shape in the error message, I guess the DataLoader is raising the error in its collate_fn while trying to create a batch from samples drawn from the Dataset.
Based on the used transformations, I guess that your input images are not square images and you would thus need to specify the img_dim as a tuple in the Resize transformation. Otherwise the resized images will keep their aspect ratio as described in the docs:

size (sequence or int) –
Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size).

so how shall I get the image size? is it in terms of (channels, height, width)?

Specify the shape as a tuple:

transforms.Resize((img_dim, img_dim))