Why my train_loss = 0.0000?

Hello, l am currently doing an convoltuinal autoencoder with 2 inputs l am using a MSE loss but my train loss is still 0.00. Many thanks for your support

you will fin my code bellow :

def __init__(self):
    super(ConvAutoencoder, self).__init__()
    ## encoder layers ##
    # conv layer (depth from 3 --> 64), 3x3 kernels
    self.conv1 = nn.Conv2d(3, 64, 3, padding=1)  
    # conv layer (depth from 64 --> 16, 3x3 kernels
    self.conv2 = nn.Conv2d(64, 3, 3, padding=1)
    # pooling layer to reduce x-y dims by two; kernel and stride of 2
    self.pool = nn.MaxPool2d(2, 2)
    
    ## decoder layers ##
    ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2
    self.t_conv1 = nn.ConvTranspose2d(3, 64, 3, stride=2)
    self.t_conv2 = nn.ConvTranspose2d(64, 3, 3, stride=2)


def forward(self, x1,x2):
    ## encode ##
    # add hidden layers with relu activation function
    # and maxpooling after
    x1 = F.relu(self.conv1(x1))
    x1 = self.pool(x1)
    # add second hidden layer
    x1 = F.relu(self.conv2(x1))
    x1 = self.pool(x1)  # compressed representation
    
    ## decode ##
    # add transpose conv layers, with relu activation function
    x1 = F.relu(self.t_conv1(x1))
    # output layer (with sigmoid for scaling from 0 to 1)
    x1 = F.sigmoid(self.t_conv2(x1))
    
    
    ## encode ##
    # add hidden layers with relu activation function
    # and maxpooling after
    x2= F.relu(self.conv1(x2))
    x1 = self.pool(x2)
    # add second hidden layer
    x2 = F.relu(self.conv2(x2))
    x2 = self.pool(x2)  # compressed representation
    
    ## decode ##
    # add transpose conv layers, with relu activation function
    x2 = F.relu(self.t_conv1(x2))
    # output layer (with sigmoid for scaling from 0 to 1)
    x2 = F.sigmoid(self.t_conv2(x2))
    
          
    return x1,x2

class CustomDataset(Dataset):

# 2. Initialize with a targ_dir and transform (optional) parameter
def __init__(self, targ_dir: str,transform=None) -> None:
    
    
    
    
    self.paths_pre = list(pathlib.Path(targ_dir).glob("*/*.png"))
    
    self.paths_post = list(pathlib.Path(targ_dir).glob("*/*.png"))
  
    
# . Overwrite the __len__() method (optional but recommended for subclasses of torch.utils.data.Dataset)
def __len__(self) -> int:
    "Returns the total number of samples."
    return len(self.paths_pre) & len(self.paths_post) 
   
# 4. Make function to load images
def  __getitem__(self, index: int,) -> Image.Image:
    "Opens an image via a path and returns it."
    
    #image_path1 = self.paths1[index]
    #image_path2 = self.paths2[index]
   
    for index in range(len(self.paths_pre)):
          image_path_pre  = self.paths_pre [index]
            
          
    for index in range(len(self.paths_post)):
          image_path_post  = self.paths_post [index]
 
  
    img_pre  = Image.open(image_path_pre).convert('RGB')
    transform = transforms.ToTensor()
    img_pre  = transform(img_pre)


    img_post = Image.open(image_path_post).convert('RGB') 
    transform = transforms.ToTensor()
    img_post = transform(img_post)
    


    

    
    return  img_pre, img_post

Transforms=transforms.Compose(
[

        transforms.Resize(224),
        transforms.RandomVerticalFlip(),
        transforms.RandomHorizontalFlip(),
        #image translalte of 10 pixels 
        transforms.RandomAffine(0, translate=(0, 1)),
        transforms.ToTensor()
    ]
)

Train = CustomDataset(‘/localhome/euda_je/data/test/OmbriaS1/train’,transform = Transforms)
Val = CustomDataset(‘/localhome/euda_je/data/test/OmbriaS1/test’, transform = Transforms)
dataloader_train=DataLoader(Train, batch_size=2, shuffle=True)
dataloader_val = DataLoader(Val, batch_size=2, shuffle=True)

class MSEReconLoss(nn.Module):
def init(self):
super(MSEReconLoss, self).init()

def forward(self, input1, input2):
    mse_loss = nn.MSELoss()
    output1 = mse_loss(input1, input1)
    output2 = mse_loss(input2, input2)

    return output1, output2

def training(epochs):

    Model.train()              
    #epoch_loss = 0.0
    
    #running_loss_train=0.0

    #for label,data in dataloader_train:
    for i, data in tqdm(enumerate(dataloader_train,0)):
    
               epoch_loss = 0.0
               running_loss_train=0.0
            
               img_pre, img_post = data

               img_pre.to(device=device)  # move to device, e.g. GPU
                

               img_post.to(device=device)
             
              #Clear the gradients
            
               optimizer.zero_grad()

                
               recon1, recon2 = Model(img_pre,img_post)
        
               reconstruction1 = criterion(recon1,img_pre)
               

               reconstruction2 = criterion(recon2,img_post)
               # Calculate the average of reconstruction1 and reconstruction2 separately.
               reconstruction1_avg = sum(reconstruction1) / len(reconstruction1)
               reconstruction2_avg = sum(reconstruction2) / len(reconstruction2)

               # Calculate the overall average of reconstruction1 and reconstruction2.
               loss_reconstruction = (reconstruction1_avg + reconstruction2_avg) / 2



               loss_reconstruction.backward()
               optimizer.step()
               running_loss_train =+loss_reconstruction.item() 
                                         

    epoch_loss += running_loss_train / len(dataloader_train) 

    train_loss.append(epoch_loss)
    
    #precision_train.append(Precision_train)
    #recall_train.append(Recall_train)
    #f1_score_train.append(F1_score_train)

    
                                  
    #torch.save(Model, 'model_UNet_Assement.pt')     
    
    print(
        f"Epoch : {epoch+1} - train_loss : {epoch_loss:.4f}  \n")
    
            


    
    return

Your custom loss class is calculating the loss between input1 and itself as well as input2 and itself. The loss between any thing and itself is always going to be zero. I haven’t looked to closely at your code, so I’m not sure what you are trying to do here, but this is going to be the culprit for the zero loss.

hello thank you yes l use the wrong loss criterion= nn.MSELoss()
but now l got a weird message

RuntimeError: The size of tensor a (64) must match the size of tensor b (3) at non-singleton dimension 1

Can you share the full error message? Is it pointing to your loss function call? If you are trying to calculate the loss between two images of different sizes, that’s likely your issue.

thnak you
torch.Size([2, 64, 256, 256])
torch.Size([2, 3, 256, 256])
torch.Size([2, 3, 256, 256])
torch.Size([2, 3, 256, 256])


RuntimeError Traceback (most recent call last)
Cell In[26], line 3
1 epochs=2
2 for epoch in range(1,epochs+1):
----> 3 training(epoch)

Cell In[25], line 59, in training(epochs)
53 #reconstruction1 = criterion(recon1,img_pre)
56 reconstruction2 = criterion(recon2,img_post)
—> 59 reconstruction1 = criterion(recon1,img_pre)
61 # Calculate the average of reconstruction1 and reconstruction2 separately.
62 reconstruction1_avg = sum(reconstruction1) / len(reconstruction1)

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/loss.py:536, in MSELoss.forward(self, input, target)
535 def forward(self, input: Tensor, target: Tensor) → Tensor:
→ 536 return F.mse_loss(input, target, reduction=self.reduction)

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/functional.py:3294, in mse_loss(input, target, size_average, reduce, reduction)
3291 if size_average is not None or reduce is not None:
3292 reduction = _Reduction.legacy_get_string(size_average, reduce)
→ 3294 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
3295 return torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))

File ~/miniconda3/lib/python3.10/site-packages/torch/functional.py:74, in broadcast_tensors(*tensors)
72 if has_torch_function(tensors):
73 return handle_torch_function(broadcast_tensors, tensors, *tensors)
—> 74 return _VF.broadcast_tensors(tensors)

RuntimeError: The size of tensor a (64) must match the size of tensor b (3) at non-singleton dimension 1
they are a size issue and l don´t know why l am using two inputs

This may be simply a copy-paste error. I just noticed in your model definition you have one rogue x1 line in the encoder portion of x2. This would indeed cause your x1 output to be a different size than expected, i.e a different size than the original input.