RELU resulting in -inf elements in the tensor

Hi everyone!!

I am using this model architecture below to output a tensor with shape (1x313x64x64) where these 313 layers are probability distributions for each pixel of the image/tensor. As last layer, I am using a RELU to avoid negative values in the output.

##################################################
############## MODEL ARCHITECTURE ##############
##################################################

class Color_model(nn.Module):
    def __init__(self):
        super(Color_model, self).__init__()
        self.features = nn.Sequential(
            # conv1
            nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride = 2, padding = 1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 64),
            # conv2
            nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 2, padding = 1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 128),
            # conv3
            nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 2, padding = 1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 256),
            # conv4
            nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 512),
            # conv5
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 2, dilation = 2),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 2, dilation = 2),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 2, dilation = 2),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 512),
            # conv6
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 2, dilation = 2),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 2, dilation = 2),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 2, dilation = 2),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 512),
            # conv7
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1, dilation = 1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features = 512),
            # conv8
            nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size = 4, stride = 2, padding = 1, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 1, padding = 1, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 1, padding = 1, dilation = 1),
            nn.ReLU(),
            # conv8_313
            nn.Conv2d(in_channels = 256, out_channels = 313, kernel_size = 1, stride = 1,dilation = 1),
            nn.ReLU(),            
	        # decoding
            #nn.Conv2d(in_channels = 313, out_channels = 2, kernel_size = 1, stride = 1)
        )
        self.apply(weights_init)
#######################################
############## TEST CODE ##############
#######################################
import torch
from torch.autograd import Variable
from skimage.color import lab2rgb
from skimage.io import imread
from model import Color_model
#from data_loader import ValImageFolder
import numpy as np
from skimage.color import rgb2lab, rgb2gray
import torch.nn as nn 
from PIL import Image
import scipy.misc
from scipy.misc import imsave
from torchvision import datasets, transforms
from training_layers import decode
import torch.nn.functional as F
import os
import imageio


scale_transform = transforms.Compose([
    transforms.Resize((64,64),2),
    #transforms.RandomCrop(224),
])

def load_image(image_path,transform=None):
    rgb_image = Image.open(image_path)
    
    if transform is not None:
        rgb_image_resized = transform(rgb_image)

    rgb_image_resized = np.asarray(rgb_image_resized)
    lab_image_resized = rgb2lab(rgb_image_resized)
    lab_image_resized = lab_image_resized.transpose(2,0,1)    
    img_l_resized = lab_image_resized[0,:,:]
    img_l_resized = (np.round(img_l_resized)).astype(np.int) # L channel
    img_l_resized = torch.from_numpy(img_l_resized).unsqueeze(0)
    
    return img_l_resized

def main():
    data_dir = "\\Dataset\\test\\images\\"
    dirs=os.listdir(data_dir)
    print(dirs)
    color_model = Color_model().cuda().eval()
    color_model.load_state_dict(torch.load('\\models\\model-1-1.ckpt'))

    T = 0.38
    soft = nn.Softmax2d()
    Q_bins = np.load('\\code\\resources\\pts_in_hull.npy')
    #print(Q_bins.shape)
    #print(Q_bins) 
    
    for file in dirs:
    
        img_l_resized = load_image(data_dir+'\\'+file, scale_transform)       
        img_l_resized = img_l_resized.unsqueeze(0).float().cuda()
        
        img_ab_313 = color_model(img_l_resized)
                
        img_ab_313 = img_ab_313.cpu()
        
        img_ab_313_log_t = (torch.log10(img_ab_313))/T
        print(img_ab_313_log_t.max())
        
        soft_image_log_t = soft(img_ab_313_log_t)

When I get the output from the model (img_ab_313) and check min() and max() values, for the min() values there are some -inf values that I don’t understand.

I am using this formula (img_ab_313_log_t = (torch.log10(img_ab_313))/T) because this will be the input of softmax() function as described in an paper that I am trying to implement. See the formula below:

image

The torch.log10() don’t accept negative values because the result is nan…so I inserted the RELU in the end to avoid this…but I am getting -inf in the output…

Someone could help me with this? :worried:

Best regards,

Matheus Santos.

torch.log10 of a zero will result in -Inf, so you could add a small offset to img_ab_313 to avoid this. :wink:

1 Like

I see!! This is true hahahahah I didn’t have that idea. Thanks! :smile:

I was thinking about this…
I used RELU at the end of the network to avoid negative values, since the log() of negative values results in nan. However, using RELU, many values in the feature maps go to zero and the log() of zero results in -inf. I avoided one issue and reach another.

Is there any way that I can apply the log() to negative numbers?
Is it normal negative values to appear in the feature maps?

Best regards,

Matheus Santos.

I think I know exactly the paper you are trying to implement, and the z distrubution is already supposed to be activated with Softmax. The annealing is just a way to rebalance the final distribution for each pixel to avoid the desaturated effect of taking the mean color of a distribution.

And for the record, it’s completely expected to have negative values in feature maps, otherwise ReLU would have no effect.

1 Like

Hey, thanks for replying!

Could you explain more about this? I think that the annealing is used only in the test step to predict the a,b channels. During the training step, the raw values of the feature maps are the input to the Cross Entropy Loss that, under the hood, apply the softmax(). So when I get the raw values of the feature maps in the test step, I need to convert them to probabilities applying the annealing and then softmax(), but the problem is that there are negative values and this break the log() of annealing.

Make total sense hahahah :smiley:

So when I get the raw values of the feature maps in the test step, I need to convert them to probabilities applying the annealing and then softmax(), but the problem is that there are negative values and this break the log() of annealing.

First, you run the output through a softmax, and then you do the annealing. The point of the annealing is to take the predicted color distribution, which is unlikely to be sharp, and remap it into one with stronger peaks.

1 Like

I understood. I thought that first I applied annealing to raw values and then I applied softmax (). In the following expression, I had interpreted it that way I said ehehehehe, but what you said makes sense.
image

By the way, could you help me with this error:

Traceback (most recent call last):
  File "/code/train.py", line 94, in <module>
    main(args)
  File "/code/train.py", line 46, in main
    encode_ab, max_encode_ab = encode_ab_layer.forward(img_ab)
  File "\code\training_layers.py", line 32, in forward
    encode=self.nnenc.encode_points_mtx_nd(x)
  File "\code\training_layers.py", line 213, in encode_points_mtx_nd
    self.pts_enc_flt[self.p_inds, inds] = wts
IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (65536,1) (49152,10)

I do not understand because when I run the train with complete image dataset this error occur…but when I executed the train with a subset of the image dataset…I did not got this error…

I think that I found the problem…all the batches must have the same number of images…when I adjusted here to all batches have the same number of images, the code worked right…

Is this a “must have”??

I thought that if I had 10 images and wanted to make batches of 6 images, in the end 2 batches would be created, one with 6 images and the other with 4 images.
Batches must have the same number of images or can they contain different numbers of images as in this case that I exemplified?

@futscdav @ptrblck

Essentially, there is no restriction that says all batches must be the same size. But some of the code you are using may have been written with that assumption. Either you can track down the error and change the assumption (could take some time and thinking) or you could set drop_last in your DataLoader to True, which will make sure all batches are the same size.

1 Like

I see! I will track down the error and try to change it later.
I didn’t know this strategy of drop_last that you mentioned, I will try this first.

Thanks for the replying! :smiley: