Hey guys, I have some problems with my neuronal network for 360 degree depth estimation. I use the UniFuse network from https://github.com/alibaba/UniFuse-Unidirectional-Fusion, but the maximum depth value isn’t enough for my problem. Also i modified and tried to learn the Network on a maximum depth of 1000m. I used the same dataset like in UniFuse and the HoliCity dataset. I got always a warning, that an inf or a Nan is appear. Maybe someone have a hint for me at this point?
After my failure i tried a pix2pix GAN for image to image translation to get a very good solution, but this failed too! I got the same error / warning with inf and NaN. I am little bit helpless now. Can somebody help me?
You can find the code below. I used mostly the code from UniFuse to ensure that my model will work. I Used pytorch==1.9.0 and torchvision==0.10.0
Diskiminator.py
import torch
import torch.nn as nn
import functools
class NLayerDiscriminator(nn.Module):
"""Defines a PatchGAN discriminator"""
def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d):
"""Construct a PatchGAN discriminator
Parameters:
input_nc (int) -- the number of channels in input images
ndf (int) -- the number of filters in the last conv layer
n_layers (int) -- the number of conv layers in the discriminator
norm_layer -- normalization layer
"""
super(NLayerDiscriminator, self).__init__()
if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters
use_bias = norm_layer.func == nn.InstanceNorm2d
else:
use_bias = norm_layer == nn.InstanceNorm2d
kw = 4
padw = 1
sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
nf_mult = 1
nf_mult_prev = 1
for n in range(1, n_layers): # gradually increase the number of filters
nf_mult_prev = nf_mult
nf_mult = min(2 ** n, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2, True)
]
nf_mult_prev = nf_mult
nf_mult = min(2 ** n_layers, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2, True)
]
sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map
self.model = nn.Sequential(*sequence)
def forward(self, img_A, img_B):
"""Standard forward."""
img_input = torch.cat((img_A, img_B), dim=1)
return self.model(img_input)
class PixelDiscriminator(nn.Module):
"""Defines a 1x1 PatchGAN discriminator (pixelGAN)"""
def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d):
"""Construct a 1x1 PatchGAN discriminator
Parameters:
input_nc (int) -- the number of channels in input images
ndf (int) -- the number of filters in the last conv layer
norm_layer -- normalization layer
"""
super(PixelDiscriminator, self).__init__()
if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters
use_bias = norm_layer.func == nn.InstanceNorm2d
else:
use_bias = norm_layer == nn.InstanceNorm2d
self.net = [
nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0),
nn.LeakyReLU(0.2, True),
nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias),
norm_layer(ndf * 2),
nn.LeakyReLU(0.2, True),
nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)]
self.net = nn.Sequential(*self.net)
def forward(self, img_A, img_B):
"""Standard forward."""
img_input = torch.cat((img_A, img_B), dim=1)
return self.net(img_input)
class PatchGAN2_Diskriminator(nn.Module):
def __init__(self, in_channels=3):
super(PatchGAN2_Diskriminator, self).__init__()
def discriminator_block(in_filters, out_filters, normalization=True):
"""Returns downsampling layers of each discriminator block"""
layers = [nn.Conv2d(in_filters, out_filters, 4, stride=2, padding=1)]
if normalization:
layers.append(nn.InstanceNorm2d(out_filters))
layers.append(nn.LeakyReLU(0.2, inplace=True))
return layers
self.model = nn.Sequential(
*discriminator_block(in_channels * 2, 64, normalization=False),
*discriminator_block(64, 128),
*discriminator_block(128, 256),
*discriminator_block(256, 512),
nn.ZeroPad2d((1, 0, 1, 0)),
nn.Conv2d(512, 1, 4, padding=1, bias=False),
)
def forward(self, img_A, img_B):
# Concatenate image and condition image by channels to produce input
img_input = torch.cat((img_A, img_B), dim=1)
return self.model(img_input.float())
trainer.py: for the normal training
from __future__ import absolute_import, division, print_function
import sys
sys.path.append("../networks")
import os
import numpy as np
import time
import json
import tqdm
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn
from tensorboardX import SummaryWriter
torch.manual_seed(100)
torch.cuda.manual_seed(100)
from myData import MyData
from unifuse import UniFuse
from metrics import compute_depth_metrics, Evaluator
from losses import BerhuLoss
class Trainer:
def __init__(self, settings):
self.settings = settings
if settings["cuda"]:
self.device = torch.device("cuda")
self.dtype = torch.cuda.DoubleTensor
else:
self.device = torch.device("cpu")
self.dtype = torch.DoubleTensor
self.log_path = os.path.join(self.settings["log_dir"], self.settings["model_name"])
train_dataset = MyData(settings["root_dir"], settings["train_file"], settings["color_augmentation"],
settings["lr_flip_augmentation"], settings["yaw_rotation_augmenation"], settings["is_training"],
settings["width"], settings["height"], settings["max_depth_meters"])
val_dataset = MyData(settings["root_dir"], settings["val_file"], settings["color_augmentation"],
settings["lr_flip_augmentation"], settings["yaw_rotation_augmenation"], settings["is_training"],
settings["width"], settings["height"], settings["max_depth_meters"])
self.train_loader = DataLoader(train_dataset, settings["batch_size"], True,
num_workers=self.settings["num_workers"], pin_memory=True, drop_last=True)
num_train_samples = len(train_dataset)
self.num_total_steps = num_train_samples // self.settings["batch_size"] * self.settings["num_epochs"]
self.val_loader = DataLoader(val_dataset, settings["batch_size"], False,
num_workers=self.settings["num_workers"], pin_memory=True, drop_last=True)
Net_dict = {"UniFuse": UniFuse}
Net = Net_dict[self.settings["net"]]
self.model = Net(self.settings["num_layers"], self.settings["height"], self.settings["width"],
self.settings["imagenet_pretrained"], self.settings["max_depth_meters"],
fusion_type=self.settings["fusion"], se_in_fusion=self.settings["se_in_fusion"])
self.model.to(self.device)
self.parameters_to_train = list(self.model.parameters())
self.optimizer = optim.Adam(self.parameters_to_train, self.settings["learning_rate"])
if self.settings["load_weights_dir"] is not None:
self.load_model()
print("Training model named:\n ", self.settings["model_name"])
print("Models and tensorboard events files are saved to:\n", self.settings["log_dir"])
print("Training is using:\n ", self.device)
self.compute_loss = BerhuLoss()
self.evaluator = Evaluator()
self.writers = {}
for mode in ["train", "val"]:
self.writers[mode] = SummaryWriter(os.path.join(self.log_path, mode))
self.save_settings()
def train(self):
"""
Run the entire training pipeline
"""
self.epoch = 0
self.step = 0
self.start_time = time.time()
#self.validate()
for self.epoch in range(self.settings["num_epochs"]):
self.train_one_epoch()
self.validate()
if(self.epoch + 1) % self.settings["save_frequency"] == 0:
self.save_model()
def train_one_epoch(self):
"""
Run a single Epoch of training
"""
self.model.train()
pbar = tqdm.tqdm(self.train_loader)
pbar.set_description("Model: {}\tTraining Epoch_{}".format(self.settings["model_name"], self.epoch))
for batch_idx, inputs in enumerate(pbar):
outputs, losses = self.process_batch(inputs)
self.optimizer.zero_grad()
losses["loss"].backward()
self.optimizer.step()
# log less frequently after the first 1000 steps to save time & disk space
early_phase = batch_idx % self.settings["log_frequency"] == 0 #and self.step < 1000
if early_phase:# or late_phase:
pred_depth = outputs["pred_depth"].detach()
gt_depth = inputs["gt_depth"]
mask = inputs["val_mask"]
depth_errors = compute_depth_metrics(gt_depth, pred_depth, mask, max=self.settings["max_depth_meters"])
for i, key in enumerate(self.evaluator.metrics.keys()):
losses[key] = np.array(depth_errors[i].cpu())
self.log("train", inputs, outputs, losses)
self.step += 1
def process_batch(self, inputs):
for key, ipt in inputs.items():
if key not in ["rgb", "cube_rgb"]:
inputs[key] = ipt.to(self.device)
losses = {}
equi_inputs = inputs["normalized_rgb"]
cube_inputs = inputs["normalized_cube_rgb"]
outputs = self.model(equi_inputs, cube_inputs)
losses["loss"] = self.compute_loss(inputs["gt_depth"],
outputs["pred_depth"],
inputs["val_mask"])
return outputs, losses
def log(self, mode, inputs, outputs, losses):
"""
Write an event to the tensorboard events file
Args:
moe:
inputs:
outputs:
losses:
Returns:
"""
writer = self.writers[mode]
for l, v in losses.items():
writer.add_scalar("{}".format(l), v, self.step)
for j in range(min(4, self.settings["batch_size"])): # write a maximum of four images
writer.add_image("rgb/{}".format(j), inputs["rgb"][j].data, self.step)
writer.add_image("cube_rgb/{}".format(j), inputs["cube_rgb"][j].data, self.step)
writer.add_image("gt_depth/{}".format(j),
inputs["gt_depth"][j].data / inputs["gt_depth"][j].data.max(), self.step)
writer.add_image("pred_depth/{}".format(j),
outputs["pred_depth"][j].data / outputs["pred_depth"][j].data.max(), self.step)
def validate(self):
"""
Validate the model on the validation set
"""
self.model.eval()
self.evaluator.reset_eval_metrics()
pbar = tqdm.tqdm(self.val_loader)
pbar.set_description("Model: {}\tValidationEpoch_{}".format(self.settings["model_name"], self.epoch))
with torch.no_grad():
for batch_idx, inputs in enumerate(pbar):
outputs, losses = self.process_batch(inputs)
pred_depth = outputs["pred_depth"].detach()
gt_depth = inputs["gt_depth"]
mask = inputs["val_mask"]
self.evaluator.compute_eval_metrics(gt_depth, pred_depth, mask)
for i, key in enumerate(self.evaluator.metrics.keys()):
losses[key] = np.array(self.evaluator.metrics[key].avg.cpu())
self.log("val", inputs, outputs, losses)
del inputs, outputs, losses
def save_model(self):
"""
Save model weights to disk
Returns:
"""
save_folder = os.path.join(self.log_path, "models", "weights_{}".format(self.epoch))
if not os.path.exists(save_folder):
os.makedirs(save_folder)
save_path = os.path.join(save_folder, "{}.pth".format("model"))
to_save = self.model.state_dict()
to_save['layers'] = self.settings["num_layers"]
to_save["height"] = self.settings["height"]
to_save["width"] = self.settings["width"]
#to_save["dataset"] = self.settings["dataset"]
to_save["net"] = self.settings["net"]
to_save['fusion'] = self.settings["fusion"]
to_save['se_in_fusion'] = self.settings["se_in_fusion"]
torch.save(to_save, save_path)
save_path = os.path.join(save_folder, "{}.pth".format("adam"))
torch.save(self.optimizer.state_dict(), save_path)
def save_settings(self):
"""
Save settings to disk so we know what we ran this experiment with
Returns:
"""
models_dir = os.path.join(self.log_path, "models")
if not os.path.exists(models_dir):
os.makedirs(models_dir)
to_save = self.settings.copy()
with open(os.path.join(models_dir, 'settings.json'), 'w') as f:
json.dump(to_save, f, indent=2)
def load_model(self):
"""
Load model from disk
Returns:
"""
self.settings["load_weights_dir"] = os.path.expanduser(self.settings["load_weights_dir"])
assert os.path.isdir(self.settings["load_weights_dir"]), \
"Cannot find folder {}".format(self.settings["load_weights_dir"])
print("loading model from folder {}".format(self.settings["load_weights_dir"]))
path = os.path.join(self.settings["load_weights_dir"], "{}.pth".format("model"))
if os.path.isfile(path):
model_dict = self.model.state_dict()
pretrained_dict = torch.load(path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
if self.settings["max_depth_meters"] is not None:
pretrained_dict["max_depth"] = self.model.max_depth
model_dict.update(pretrained_dict)
self.model.load_state_dict(model_dict)
# loading adam state
optimizer_load_path = os.path.join(self.settings["load_weights_dir"], "adam.pth")
if os.path.isfile(optimizer_load_path):
print("Loading Adam weights")
optimizer_dict = torch.load(optimizer_load_path)
self.optimizer.load_state_dict(optimizer_dict)
else:
print("Cannot find Adam weight so Adam is randomly initialized")
trainer_dis.py: Trainer for the diskriminator
from __future__ import absolute_import, division, print_function
import sys
sys.path.append("../networks")
import os
import numpy as np
import time
import json
import tqdm
import torch
from torch.nn import BCEWithLogitsLoss
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn
from tensorboardX import SummaryWriter
torch.manual_seed(100)
torch.cuda.manual_seed(100)
from myData import MyData
from unifuse import UniFuse
from unifuse_norm import UniFuse as UniFuse_norm
from diskriminator import PatchGAN2_Diskriminator, NLayerDiscriminator, PixelDiscriminator
from metrics import compute_depth_metrics, Evaluator
from losses import BerhuLoss, L2Loss, L1Loss
from utils import *
class Trainer:
def __init__(self, settings):
self.settings = settings
if settings["cuda"]:
self.device = torch.device("cuda")
self.floatTensor = torch.cuda.FloatTensor
else:
self.device = torch.device("cpu")
self.floatTensor = torch.FloatTensor
self.log_path = os.path.join(self.settings["log_dir"], self.settings["model_name"])
train_dataset = MyData(settings["root_dir"], settings["train_file"], settings["color_augmentation"],
settings["lr_flip_augmentation"], settings["yaw_rotation_augmenation"],
settings["is_training"],
settings["width"], settings["height"], settings["max_depth_meters"])
val_dataset = MyData(settings["root_dir"], settings["val_file"], settings["color_augmentation"],
settings["lr_flip_augmentation"], settings["yaw_rotation_augmenation"],
settings["is_training"],
settings["width"], settings["height"], settings["max_depth_meters"])
self.train_loader = DataLoader(train_dataset, settings["batch_size"], True,
num_workers=self.settings["num_workers"], pin_memory=True, drop_last=True)
num_train_samples = len(train_dataset)
self.num_total_steps = num_train_samples // self.settings["batch_size"] * self.settings["num_epochs"]
self.val_loader = DataLoader(val_dataset, settings["batch_size"], False,
num_workers=self.settings["num_workers"], pin_memory=True, drop_last=True)
# Generator
Net_dict = {"UniFuse": UniFuse}
Net = Net_dict[self.settings["net"]]
self.generator = Net(self.settings["num_layers"], self.settings["height"], self.settings["width"],
self.settings["imagenet_pretrained"], self.settings["max_depth_meters"],
fusion_type=self.settings["fusion"], se_in_fusion=self.settings["se_in_fusion"])
self.generator.to(self.device)
self.parameters_to_train_gen = list(self.generator.parameters())
self.optimizer_gen = optim.SGD(self.parameters_to_train_gen, self.settings["learning_rate_dis"]) if self.settings["optimizer_dis"] == "SGD" else optim.Adam(self.parameters_to_train_gen, lr=self.settings["learning_rate"], betas=self.settings["generator_beta"])
# Diskriminator
Net_dis_dict = {"PatchGAN2": PatchGAN2_Diskriminator, "NLayerDiscriminator": NLayerDiscriminator, "PixelDiscriminator": PixelDiscriminator}
Net_dis = Net_dis_dict[self.settings["diskriminator"]]
self.diskriminator = Net_dis(self.settings["dis_channel"])
self.diskriminator.to(self.device)
self.parameters_to_train_dis = list(self.diskriminator.parameters())
self.optimizer_dis = optim.SGD(self.parameters_to_train_dis, self.settings["learning_rate_dis"]) if self.settings["optimizer_gen"] == "SGD" else optim.Adam(self.parameters_to_train_dis, lr=self.settings["learning_rate_dis"], betas=self.settings["diskriminator_beta"])
if self.settings["load_weights_dir"] is not None:
self.load_model()
print("Training model named:\n ", self.settings["model_name"])
print("Models and tensorboard events files are saved to:\n", self.settings["log_dir"])
print("Training is using:\n ", self.device)
loss_dict = {"L1Loss": L1Loss, "L2Loss": L2Loss, "BerhuLoss": BerhuLoss, "BCEWithLogitsLoss": BCEWithLogitsLoss}
loss_adversial = loss_dict[self.settings["adversial_loss"]]
loss_gan = loss_dict[self.settings["gan_loss"]]
self.adversial_criterion = loss_adversial()
self.gan_criterion = loss_gan()#L1Loss()
self.evaluator = Evaluator()
self.writers = {}
for mode in ["train", "val"]:
self.writers[mode] = SummaryWriter(os.path.join(self.log_path, mode))
self.save_settings()
def load_model(self):
"""
Load model from disk
Returns:
"""
self.settings["load_weights_dir"] = os.path.expanduser(self.settings["load_weights_dir"])
assert os.path.isdir(self.settings["load_weights_dir"]), \
"Cannot find folder {}".format(self.settings["load_weights_dir"])
print("loading model from folder {}".format(self.settings["load_weights_dir"]))
# Load generator
path = os.path.join(self.settings["load_weights_dir"], "{}.pth".format("generator"))
if os.path.isfile(path):
model_dict = self.generator.state_dict()
pretrained_dict = torch.load(path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
if self.settings["max_depth_meters"] is not None:
pretrained_dict["max_depth"] = self.generator.max_depth
model_dict.update(pretrained_dict)
self.generator.load_state_dict(model_dict)
else:
self.generator = init_net(self.generator)
print("Cannot find Generator weights so weights are normal distributed initialized")
# loading generator adam state
optimizer_load_path = os.path.join(self.settings["load_weights_dir"], "adam_generator.pth")
if os.path.isfile(optimizer_load_path):
print("Loading Generator Adam weights")
optimizer_dict = torch.load(optimizer_load_path)
self.optimizer_gen.load_state_dict(optimizer_dict)
else:
print("Cannot find Generator Adam weight so Adam is randomly initialized")
# Load discriminator
path = os.path.join(self.settings["load_weights_dir"], "{}.pth".format("discriminator"))
if os.path.isfile(optimizer_load_path):
model_dict = self.diskriminator.state_dict()
pretrained_dict = torch.load(path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
self.diskriminator.load_state_dict(model_dict)
else:
self.diskriminator = init_net(self.diskriminator)
print("Cannot find Discriminator weights so weights are normal distributed initialized")
# loading discriminator adam state
optimizer_load_path = os.path.join(self.settings["load_weights_dir"], "adam_discriminator.pth")
if os.path.isfile(optimizer_load_path):
print("Loading Disrciminator Adam weights")
optimizer_dict = torch.load(optimizer_load_path)
self.optimizer_dis.load_state_dict(optimizer_dict)
else:
print("Cannot find Discriminator Adam weight so Adam is randomly initialized")
def save_model(self):
"""
Save model weights to disk
Returns:
"""
save_folder = os.path.join(self.log_path, "models", "weights_{}".format(self.epoch))
if not os.path.exists(save_folder):
os.makedirs(save_folder)
save_path = os.path.join(save_folder, "{}.pth".format("generator"))
to_save = self.generator.state_dict()
to_save['layers'] = self.settings["num_layers"]
to_save["height"] = self.settings["height"]
to_save["width"] = self.settings["width"]
to_save["max_depth_meters"] = self.settings["max_depth_meters"]
to_save["net"] = self.settings["net"]
to_save['fusion'] = self.settings["fusion"]
to_save['se_in_fusion'] = self.settings["se_in_fusion"]
to_save["learning_rate"] = self.settings["learning_rate"]
to_save["generator_beta"] = self.settings["generator_beta"]
to_save["optimizer_gen"] = self.settings["optimizer_gen"]
torch.save(to_save, save_path)
save_path = os.path.join(save_folder, "{}.pth".format("discriminator"))
to_save = self.diskriminator.state_dict()
to_save['dis_channel'] = self.settings["dis_channel"]
to_save["lambda"] = self.settings["lambda"]
to_save["diskriminator_beta"] = self.settings["diskriminator_beta"]
to_save["learning_rate_dis"] = self.settings["learning_rate_dis"]
to_save["diskriminator"] = self.settings["diskriminator"]
to_save["optimizer_dis"] = self.settings["optimizer_dis"]
torch.save(to_save, save_path)
save_path = os.path.join(save_folder, "{}.pth".format("adam_generator"))
torch.save(self.optimizer_gen.state_dict(), save_path)
save_path = os.path.join(save_folder, "{}.pth".format("adam_discriminator"))
torch.save(self.optimizer_dis.state_dict(), save_path)
def save_settings(self):
"""
Save settings to disk so we know what we ran this experiment with
Returns:
"""
models_dir = os.path.join(self.log_path, "models")
if not os.path.exists(models_dir):
os.makedirs(models_dir)
to_save = self.settings.copy()
with open(os.path.join(models_dir, 'settings.json'), 'w') as f:
json.dump(to_save, f, indent=2)
def train(self):
"""
Run the entire training pipeline
"""
self.epoch = 0
self.step = 0
self.start_time = time.time()
#self.validate()
for self.epoch in range(self.settings["num_epochs"]):
self.train_one_epoch()
self.validate()
if(self.epoch + 1) % self.settings["save_frequency"] == 0:
self.save_model()
def train_one_epoch(self):
"""
Run a single Epoch of training
"""
self.generator.train()
self.diskriminator.train()
pbar = tqdm.tqdm(self.train_loader)
pbar.set_description("Model: {}\tTraining Epoch_{}".format(self.settings["model_name"], self.epoch))
for idx, inputs in enumerate(pbar):
outputs, losses = self.process_batch(inputs, isTrain=True)
early_phase = idx % self.settings["log_frequency"] == 0
if early_phase:
pred_depth = outputs["pred_depth"].detach()
gt_depth = inputs["gt_depth"]
mask = inputs["val_mask"]
depth_errors = compute_depth_metrics(gt_depth, pred_depth, mask, max=self.settings["max_depth_meters"])
for i, key in enumerate(self.evaluator.metrics.keys()):
losses[key] = np.array(depth_errors[i].cpu())
self.log("train", inputs, outputs, losses)
self.step += 1
def log(self, mode, inputs, outputs, losses):
"""
Write an event to the tensorboard events file
Args:
moe:
inputs:
outputs:
losses:
Returns:
"""
writer = self.writers[mode]
for l, v in losses.items():
writer.add_scalar("{}".format(l), v, self.step)
for j in range(min(4, self.settings["batch_size"])): # write a maximum of four images
writer.add_image("rgb/{}".format(j), inputs["rgb"][j].data, self.step)
writer.add_image("cube_rgb/{}".format(j), inputs["cube_rgb"][j].data, self.step)
writer.add_image("gt_depth/{}".format(j),
inputs["gt_depth"][j].data / inputs["gt_depth"][j].data.max(), self.step)
writer.add_image("pred_depth/{}".format(j),
outputs["pred_depth"][j].data / outputs["pred_depth"][j].data.max(), self.step)
def validate(self):
"""
Validate the model on the validation set
"""
self.generator.eval()
self.diskriminator.eval()
self.evaluator.reset_eval_metrics()
pbar = tqdm.tqdm(self.val_loader)
pbar.set_description("Model: {}\tValidationEpoch_{}".format(self.settings["model_name"], self.epoch))
with torch.no_grad():
for idx, inputs in enumerate(pbar):
outputs, losses = self.process_batch(inputs, isTrain=False)
pred_depth = outputs["pred_depth"].detach()
gt_depth = inputs["gt_depth"]
mask = inputs["val_mask"]
self.evaluator.compute_eval_metrics(gt_depth, pred_depth, mask)
for i, key in enumerate(self.evaluator.metrics.keys()):
losses[key] = np.array(self.evaluator.metrics[key].avg.cpu())
self.log("val", inputs, outputs, losses)
del inputs, outputs, losses
def process_batch(self, inputs, isTrain):
for key, ipt in inputs.items():
if key not in ["rgb", "cube_rgb"]:
inputs[key] = ipt.to(self.device)
# create ground truths
real_gt = Variable(self.floatTensor(1, 1).fill_(1.0), requires_grad=False)
fake_gt = Variable(self.floatTensor(1, 1).fill_(0.0), requires_grad=False)
losses = {}
# Train generator
losses, outputs = self.compute_generator(inputs, real_gt, losses, isTrain)
losses = self.compute_diskriminator(inputs, outputs, real_gt, fake_gt, losses, isTrain)
return outputs, losses
def compute_diskriminator(self, inputs, outputs, real_gt, fake_gt, losses, isTrain):
real_depth = inputs["gt_depth"]
rgb = inputs["normalized_rgb"]
if torch.any(~torch.isfinite(real_depth)):
print("real_depth")
if torch.any(~torch.isfinite(rgb)):
print(rgb)
# Loss for fake images
fake_dis = self.diskriminator(outputs["pred_depth"].detach(), rgb)
fake_gt = fake_gt.expand_as(fake_dis)
d_fake_loss = self.gan_criterion(fake_dis, fake_gt)
# Loss for real images
real_dis = self.diskriminator(real_depth.float(), rgb)
real_gt = real_gt.expand_as(real_dis)
d_real_loss = self.gan_criterion(real_dis, real_gt)
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss) / 2
if torch.any(~torch.isfinite(d_loss)):
print("d_loss")
losses["discriminator_loss"] = d_loss
if isTrain:
self.optimizer_dis.zero_grad()
d_loss.backward()
self.optimizer_dis.step()
return losses
def compute_generator(self, inputs, real_gt, losses, isTrain):
equi_inputs = inputs["normalized_rgb"]
cube_inputs = inputs["normalized_cube_rgb"]
rgb = equi_inputs
real_depth = inputs["gt_depth"]
# generate fake image
outputs = self.generator(equi_inputs, cube_inputs)
if torch.any(~torch.isfinite(outputs["pred_depth"])):
print("pred_depth")
# verify if fake or real
fake_out = self.diskriminator(outputs["pred_depth"], rgb)
real_gt = real_gt.expand_as(fake_out)
if torch.any(~torch.isfinite(fake_out)):
print("fake_out")
generator_loss = self.gan_criterion(fake_out, real_gt)
adversarial_loss = self.adversial_criterion(outputs["pred_depth"], real_depth)
loss = generator_loss + (self.settings["lambda"] * adversarial_loss)
if torch.any(~torch.isfinite(loss)):
print("loss _generator")
print(loss.type())
losses["generator_loss"] = loss
if isTrain:
self.optimizer_gen.zero_grad()
loss.backward()
self.optimizer_gen.step()
return losses, outputs