Here is my network
class Encoder(nn.Module):
def init(self, input_channels, output_channels, representation_size=64):
super(Encoder, self).init()
# input parameters
self.input_channels = input_channels
self.output_channels = output_channels
self.features = nn.Sequential(
# nc x 64 x 64
nn.Conv2d(self.input_channels, representation_size, 5, stride=2, padding=2),
nn.BatchNorm2d(representation_size),
nn.ReLU(),
# hidden_size x 32 x 32
nn.Conv2d(representation_size, representation_size * 2, 5, stride=2, padding=2),
nn.BatchNorm2d(representation_size * 2),
nn.ReLU(),
# hidden_size*2 x 16 x 16
nn.Conv2d(representation_size * 2, representation_size * 4, 5, stride=2, padding=2),
nn.BatchNorm2d(representation_size * 4),
nn.ReLU())
# hidden_size*4 x 8 x 8
self.mean = nn.Sequential(
nn.Linear(representation_size * 4 * 8 * 8, 2048),
nn.BatchNorm1d(2048),
nn.ReLU(),
nn.Linear(2048, output_channels))
self.logvar = nn.Sequential(
nn.Linear(representation_size * 4 * 8 * 8, 2048),
nn.BatchNorm1d(2048),
nn.ReLU(),
nn.Linear(2048, output_channels))
def forward(self, x):
batch_size = x.size()[0]
hidden_representation = self.features(x)
mean = self.mean(hidden_representation.view(batch_size, -1))
logvar = self.logvar(hidden_representation.view(batch_size, -1))
return mean, logvar
def hidden_layer(self, x):
batch_size = x.size()[0]
output = self.features(x)
return output
class Decoder(nn.Module):
def init(self, input_size, representation_size):
super(Decoder, self).init()
self.input_size = input_size
self.representation_size = representation_size
dim = representation_size[0] * representation_size[1] * representation_size[2]
self.preprocess = nn.Sequential(
nn.Linear(input_size, dim),
nn.BatchNorm1d(dim),
nn.ReLU())
# 256 x 8 x 8
self.deconv1 = nn.ConvTranspose2d(representation_size[0], 256, 5, stride=2, padding=2)
self.act1 = nn.Sequential(nn.BatchNorm2d(256),
nn.ReLU())
# 256 x 16 x 16
self.deconv2 = nn.ConvTranspose2d(256, 128, 5, stride=2, padding=2)
self.act2 = nn.Sequential(nn.BatchNorm2d(128),
nn.ReLU())
# 128 x 32 x 32
self.deconv3 = nn.ConvTranspose2d(128, 32, 5, stride=2, padding=2)
self.act3 = nn.Sequential(nn.BatchNorm2d(32),
nn.ReLU())
# 32 x 64 x 64
self.deconv4 = nn.ConvTranspose2d(32, 3, 5, stride=1, padding=2)
# 3 x 64 x 64
self.activation = nn.Tanh()
def forward(self, code):
bs = code.size()[0]
preprocessed_codes = self.preprocess(code)
preprocessed_codes = preprocessed_codes.view(-1,
self.representation_size[0],
self.representation_size[1],
self.representation_size[2])
output = self.deconv1(preprocessed_codes, output_size=(bs, 256, 16, 16))
output = self.act1(output)
output = self.deconv2(output, output_size=(bs, 128, 32, 32))
output = self.act2(output)
output = self.deconv3(output, output_size=(bs, 32, 64, 64))
output = self.act3(output)
output = self.deconv4(output, output_size=(bs, 3, 64, 64))
output = self.activation(output)
return output
class VAE_GAN_Generator(nn.Module):
def init(self, input_channels, hidden_size, representation_size=(256, 8, 8)):
super(VAE_GAN_Generator, self).init()
self.input_channels = input_channels
self.hidden_size = hidden_size
self.representation_size = representation_size
self.encoder = Encoder(input_channels, hidden_size)
self.decoder = Decoder(hidden_size, representation_size)
def forward(self, x):
batch_size = x.size()[0]
mean, logvar = self.encoder(x)
std = logvar.mul(0.5).exp_()
reparametrized_noise = Variable(torch.randn((batch_size, self.hidden_size))).cuda()
reparametrized_noise = mean + std * reparametrized_noise
rec_images = self.decoder(reparametrized_noise)
return mean, logvar, rec_images
class Discriminator(nn.Module):
def init(self, input_channels=3, representation_size=(256, 8, 8)):
super(Discriminator, self).init()
self.representation_size = representation_size
dim = representation_size[0] * representation_size[1] * representation_size[2]
self.main = nn.Sequential(
nn.Conv2d(input_channels, 32, 5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.2),
nn.Conv2d(32, 128, 5, stride=2, padding=2),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2),
nn.Conv2d(128, 256, 5, stride=2, padding=2),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2),
nn.Conv2d(256, 256, 5, stride=2, padding=2),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2))
self.lth_features = nn.Sequential(
nn.Linear(dim, 2048),
nn.LeakyReLU(0.2))
self.sigmoid_output = nn.Sequential(
nn.Linear(2048, 1),
nn.Sigmoid())
def forward(self, x):
batch_size = x.size()[0]
features = self.main(x)
lth_rep = self.lth_features(features.view(batch_size, -1))
output = self.sigmoid_output(lth_rep)
return output
def similarity(self, x):
batch_size = x.size()[0]
features = self.main(x)
lth_rep = self.lth_features(features.view(batch_size, -1))
return lth_rep
Here is the training process:
G = VAE_GAN_Generator(input_channels, hidden_size).cuda()
D = Discriminator(input_channels).cuda()
criterion = nn.BCELoss()
criterion.cuda()
opt_enc = optim.RMSprop(G.encoder.parameters(), lr=lr)
opt_dec = optim.RMSprop(G.decoder.parameters(), lr=lr)
opt_dis = optim.RMSprop(D.parameters(), lr=lr * alpha)
for data, _ in dataloader:
batch_size = data.size()[0]
ones_label = Variable(torch.ones(batch_size)).cuda()
zeros_label = Variable(torch.zeros(batch_size)).cuda()
# print (data.size())
datav = Variable(data).cuda()
mean, logvar, rec_enc = G(datav)
# print ("The size of rec_enc:", rec_enc.size())
noisev = Variable(torch.randn(batch_size, hidden_size)).cuda()
rec_noise = G.decoder(noisev)
# train discriminator
output = D(datav)
errD_real = criterion(output, ones_label)
D_real_list.append(output.data.mean())
output = D(rec_enc)
errD_rec_enc = criterion(output, zeros_label)
D_rec_enc_list.append(output.data.mean())
output = D(rec_noise)
errD_rec_noise = criterion(output, zeros_label)
D_rec_noise_list.append(output.data.mean())
dis_img_loss = errD_real + errD_rec_enc + errD_rec_noise
# print ("print (dis_img_loss)", dis_img_loss)
D_list.append(dis_img_loss.data.mean())
opt_dis.zero_grad()
dis_img_loss.backward(retain_graph=True)
opt_dis.step()
# train decoder
output = D(datav)
errD_real = criterion(output, ones_label)
output = D(rec_enc)
errD_rec_enc = criterion(output, zeros_label)
output = D(rec_noise)
errD_rec_noise = criterion(output, zeros_label)
similarity_rec_enc = D.similarity(rec_enc)
similarity_data = D.similarity(datav)
dis_img_loss = errD_real + errD_rec_enc + errD_rec_noise
# print (dis_img_loss)
# gen_img_loss = -dis_img_loss
gen_img_loss = -dis_img_loss
g_loss_list.append(gen_img_loss.data.mean())
rec_loss = ((similarity_rec_enc - similarity_data) ** 2).mean()
rec_loss_list.append(rec_loss.data.mean())
err_dec = gamma * rec_loss + gen_img_loss
# train encoder
prior_loss = 1 + logvar - mean.pow(2) - logvar.exp()
prior_loss = (-0.5 * torch.sum(prior_loss)) / torch.numel(mean.data)
# print (prior_loss, mean, std)
prior_loss_list.append(prior_loss.data.mean())
# err_enc = prior_loss + beta * rec_loss
# err_enc= prior_loss #it's okay
opt_dec.zero_grad()
err_dec.backward(retain_graph=True)
opt_dec.step()
err_enc = rec_loss #
opt_enc.zero_grad()
err_enc.backward() #
# RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
# [torch.cuda.FloatTensor [32, 3, 5, 5]] is at version 2; expected version 1 instead. Hint: enable anomaly detection
# to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
opt_enc.step()
when I change the backward order of the err_dec and err_enc like this
err_enc = rec_loss # 报错
opt_enc.zero_grad()
err_enc.backward(retain_graph=True)
opt_enc.step()
opt_dec.zero_grad()
err_dec.backward()
# RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [2048, 64]]
opt_dec.step()
I guess the main problem is the rec_loss, but the same code work well with pytorch1.2, So I wonder how to make it work for pytorch1.5.1.