I’m trying to train an ACGAN on CIFAR-10. However, I get the following traceback:
Traceback (most recent call last):
File "/.../cifar_acgan_3.py", line 251, in <module>
bce_fake_loss = bce_criterion(b_disc,fake_labels)
File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1186, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/loss.py", line 618, in forward
return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
File "/Users/.../opt/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 3080, in binary_cross_entropy
raise ValueError(
ValueError: Using a target size (torch.Size([100, 1])) that is different to the input size (torch.Size([25, 1])) is deprecated. Please ensure they have the same size.
When training with an input dimension of 32x32 (in_h = in_w = 32), everything works fine. However, I would like the input (samples for D) and the output (generated images of G) to be of the size 64x64 (in_h = in_w = 64). I’m not sure how to accomplish this. Thankful for any help!
This is the code:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import random
import math
import os
batch_size = 100
epochs = 50
latent_dim = 100
class_dim = 10
gf_dim = 96
df_dim = 16
in_w = in_h = 64
c_dim = 3
device = 'cpu'
manualSeed = 3734
print("Random Seed: ",manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
transform = transforms.Compose([transforms.Resize((in_h,in_w)), transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,)),])
def transform_inverse (y):
(...)
def batch_transform_inverse(y):
(...)
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set,batch_size=batch_size, shuffle =True)
test_set = torchvision.datasets.CIFAR10 (root='./data',train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle = False)
train_iter = iter(train_loader)
test_iter = iter(test_loader)
real_batch, _ = next(iter(train_loader))
def conv_bn_layer(in_channels,out_channels,kernel_size,stride=1,padding=0):
return nn.Sequential(
nn.Conv2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding),
nn.BatchNorm2d(out_channels,momentum=0.1,eps=1e-5),)
def tconv_bn_layer(in_channels,out_channels,kernel_size,stride=1,padding=0,output_padding=0):
return nn.Sequential(
nn.ConvTranspose2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding,output_padding=output_padding),
nn.BatchNorm2d(out_channels,momentum=0.1,eps=1e-5),)
def tconv_layer(in_channels,out_channels,kernel_size,stride=1,padding=0,output_padding=0):
return nn.ConvTranspose2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding,output_padding=output_padding)
def conv_layer(in_channels,out_channels,kernel_size,stride=1,padding=0):
return nn.Conv2d(in_channels,out_channels,kernel_size,stride=stride,padding=padding)
def fc_layer(in_features,out_features):
return nn.Linear(in_features,out_features)
def fc_bn_layer(in_features,out_features):
return nn.Sequential(
nn.Linear(in_features,out_features),
nn.BatchNorm1d(out_features))
def conv_out_size_same(size, stride):
return int(math.ceil(float(size) / float(stride)))
s_h, s_w = in_h, in_w
s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
class Generator(nn.Module):
def __init__(self):
super(Generator,self).__init__()
self.fc_layer1 = fc_layer(latent_dim+class_dim,gf_dim*8)
self.up_sample_layer2 = tconv_bn_layer(gf_dim*8,gf_dim*4,4,2,0)
self.up_sample_layer3 = tconv_bn_layer(gf_dim*4,gf_dim*2,4,2,1)
self.up_sample_layer4 = tconv_bn_layer(gf_dim*2,gf_dim,4,2,1)
self.up_sample_layer5 = tconv_layer(gf_dim,c_dim,4,2,1)
self.tanh = nn.Tanh()
def forward(self, x):
x = F.relu(self.fc_layer1(x)).view(-1,gf_dim*8,1,1)
x = F.relu(self.up_sample_layer2(x))
x = F.relu(self.up_sample_layer3(x))
x = F.relu(self.up_sample_layer4(x))
x = self.up_sample_layer5(x)
return self.tanh(x)
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator,self).__init__()
self.conv_layer1 = conv_layer(c_dim,df_dim,3,2,1)
self.conv_layer2 = conv_bn_layer(df_dim,df_dim*2,3,1,1)
self.conv_layer3 = conv_bn_layer(df_dim*2,df_dim*4,3,2,1)
self.conv_layer4 = conv_bn_layer(df_dim*4,df_dim*8,3,1,1)
self.conv_layer5 = conv_bn_layer(df_dim*8,df_dim*16,3,2,1)
self.conv_layer6 = conv_bn_layer(df_dim*16,df_dim*32,3,1,1)
self.aux_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,class_dim) # cls
self.dis_fc_layer7 = fc_layer(df_dim*32*s_w8*s_h8,1) # fake/real
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
x = F.leaky_relu(self.conv_layer1(x),0.2)
x = F.leaky_relu(self.conv_layer2(x),0.2)
x = F.leaky_relu(self.conv_layer3(x),0.2)
x = F.leaky_relu(self.conv_layer4(x),0.2)
x = F.leaky_relu(self.conv_layer5(x),0.2)
x = F.leaky_relu(self.conv_layer6(x),0.2)
x = x.view(-1,df_dim*32*s_w8*s_h8)
aux = self.aux_fc_layer7(x)
disc = self.dis_fc_layer7(x)
return self.softmax(aux), self.sigmoid(disc)
def weights_init(m):
(...)
G = Generator().to(device)
D = Discriminator().to(device)
bce_criterion = nn.BCELoss()
nll_criterion = nn.NLLLoss()
G_optimizer = optim.Adam(G.parameters(), lr=5e-4,betas=(0.5,0.999))
D_optimizer = optim.Adam(D.parameters(), lr=2e-4,betas=(0.5,0.999))
fixed_latent = torch.randn(100,latent_dim,device=device)
fixed_labels = torch.zeros(100,class_dim,device=device)
for j in range(10):
for i in range(class_dim):
fixed_labels[i*10+j][i]=1
fixed_noise=torch.cat((fixed_latent,fixed_labels),1)
with torch.no_grad():
fake_batch=G(fixed_noise)
def compute_cls_acc(m_disc,cls_labels):
return ((m_disc.argmax(dim=1) == cls_labels)*1.0).sum()/100
iter_per_plot = 10
plot_per_eps=(int(len(train_loader)/iter_per_plot))
transform_PIL=transforms.ToPILImage()
for ep in range(epochs):
for i, (data, cls_labels) in enumerate(train_loader):
b_size=data.shape[0]
data = data.to(device)
cls_labels = cls_labels.to(device)
cls_one_hot=torch.zeros(b_size,class_dim,device=device)
cls_one_hot[torch.arange(b_size), cls_labels] = 1.0
real_labels = torch.ones(b_size,1).to(device)
fake_labels = torch.zeros(b_size,1).to(device)
# with real
# Train D
D.zero_grad()
_ , real_score = m_disc, b_disc = D(data)
bce_real_loss = bce_criterion(b_disc,real_labels)
cls_real_loss = nll_criterion(m_disc,cls_labels)
real_cls_acc=compute_cls_acc(m_disc,cls_labels)#for logging
# with fake
latent_z = torch.randn(b_size,latent_dim).to(device)
latent_c = cls_one_hot
latent = torch.cat((latent_z,latent_c),dim=1)
fake = G(latent)
_, fake_score = m_disc, b_disc = D(fake.detach())
bce_fake_loss = bce_criterion(b_disc,fake_labels)
cls_fake_loss = nll_criterion(m_disc,cls_labels)
fake_cls_acc=compute_cls_acc(m_disc,cls_labels)#for logging
D_Ls = bce_real_loss + bce_fake_loss
D_Lc = 1.8*cls_real_loss + 0.2*cls_fake_loss # for learning stability
loss_D = D_Ls + D_Lc
loss_D.backward()
D_optimizer.step()
#Train G
G.zero_grad()
m_disc, b_disc = D(fake)
G_Ls = bce_criterion(b_disc,real_labels)
G_Lc = nll_criterion(m_disc,cls_labels)
loss_G = G_Ls + G_Lc
loss_G.backward()
G_optimizer.step()
if (i+1)%iter_per_plot == 0 :
print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}'.format(ep, epochs, i+1, len(train_loader), loss_D.item(), loss_G.item()))