Solving class imbalance by implementing weighted cross entropy

Hi again!

This platform helped me lot in making progress in my school project that I’ve been into since last year. My projects is making a semantic segmentation model (61 classes including the background). I am in the step of solving the current problem I am facing which is class imbalance. One answer/suggestion I got here in the forum is to use weighted cross entropy. I have been trying to implement this but I still have the same output (which is my model tends to classify everything as a background). Today I tried another approach in using weighted cross entropy that I read across this platform too ( Passing the weights to CrossEntropyLoss correctly - PyTorch Forums) but I got an error. Can I ask anyone to help me understand what this error means? And how I might solve it? this is my code snippet for my training part:

weights = [1200, 43, 2, 6, 1, 6, 225, 74, 185, 55, 15, 20, 10, 151, 3, 79, 9, 41, 41, 27, 3, 62, 91, 11, 4, 0, 7, 4, 69, 6, 171, 5, 3, 11, 68, 23, 0, 310, 4, 20, 22, 49, 3, 35, 9, 4, 37, 13, 2, 4, 29, 71, 28, 12, 6, 6, 110, 4, 78, 269, 227]
normedWeights = [1-(x/sum(weights)) for x in weights]
print(normedWeights)
class_weights = torch.FloatTensor(normedWeights).to(device)

import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
loss_function = nn.CrossEntropyLoss(weight=class_weights, reduction = 'none')

from openpyxl import *
import time

BATCH_SIZE = 4
EPOCHS = 10

wb=load_workbook("/content/drive/MyDrive/Thesis_Quilang/Epoch_loss.xlsx")
ws=wb["Sheet1"]

def train(model):
  model.train()
  for epoch in range(EPOCHS):
      for i in tqdm(range(0, len(img_train), BATCH_SIZE)): 
          batch_img_train = img_train[i:i+BATCH_SIZE].view(-1, 3, 224, 224)
          batch_mask_train = mask_train[i:i+BATCH_SIZE].view(-1, 1, 224, 224)
        
          model.zero_grad()

          outputs = model(batch_img_train)
        
          loss = loss_function(outputs, batch_mask_train.squeeze(1).long())
          loss.backward()
          optimizer.step()    # Does the update

      loss_value = round(float(f"{loss}"), 3)
      wcell1=ws.cell(epoch+2,1)
      wcell1.value=(f"{epoch}")
      wcell2=ws.cell(epoch+2,2)
      wcell2.value= (f"{loss_value}")

      print(f"Epoch: {epoch}, Loss: {loss}")

  Model_Name = f"{loss_value}-model"
  wcell_header1 = ws.cell(1,1)
  wcell_header1.value = 'Epoch'
  wcell_header2 = ws.cell(1,2 )
  wcell_header2.value = 'Loss Value'
  ws.title = (Model_Name)    
  wb.save("/content/drive/MyDrive/Thesis_Quilang/Epoch_loss.xlsx")

  
  path = F"/content/drive/MyDrive/Thesis_Quilang/{Model_Name}.pt" 
  torch.save(model.state_dict(), path)


      
train(model)


And this is the error:

  0%|          | 0/338 [00:01<?, ?it/s]
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-28-55655fafc090> in <module>()
     45 
     46 
---> 47 train(model)

3 frames
/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in _make_grads(outputs, grads)
     48             if out.requires_grad:
     49                 if out.numel() != 1:
---> 50                     raise RuntimeError("grad can be implicitly created only for scalar outputs")
     51                 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
     52             else:

RuntimeError: grad can be implicitly created only for scalar outputs

The gradients in pytorch can be computed on scalar outputs in pytorch. Your loss is not a scalar as you have used reduction=‘none’. Either use reduction=‘mean’ or reduction=‘sum’ or just calculate mean loss using

loss = loss.mean()

before calling backward.

Hi the reason I used reduction=‘none’ was because I read it from one of the threads here that if I normalize the weights of the classes my self, I will use ‘none’ as reduction. But I will try your suggestion on using loss.mean()! thank you so much!

Hi everyone! I’ve implemented weighted cross entropy already trying to solve my class imbalance issue but still my network segmented output is still ALL BACKGROUND. Where could have I’ve done my mistake? I have small loss value already

weights = [1200, 43, 2, 6, 1, 6, 225, 74, 185, 55, 15, 20, 10, 151, 3, 79, 9, 41, 41, 27, 3, 62, 91, 11, 4, 0, 7, 4, 69, 6, 171, 5, 3, 11, 68, 23, 0, 310, 4, 20, 22, 49, 3, 35, 9, 4, 37, 13, 2, 4, 29, 71, 28, 12, 6, 6, 110, 4, 78, 269, 227]
normedWeights = [1-(x/sum(weights)) for x in weights]
print(normedWeights)
class_weights = torch.FloatTensor(normedWeights).to(device)

import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
loss_function = nn.CrossEntropyLoss(weight=class_weights, reduction = 'none')

from openpyxl import *
import time

BATCH_SIZE = 4
EPOCHS = 10

wb=load_workbook("/content/drive/MyDrive/Thesis_Quilang/Epoch_loss.xlsx")
ws=wb["Sheet1"]

def train(model):
  model.train()
  for epoch in range(EPOCHS):
      for i in tqdm(range(0, len(img_train), BATCH_SIZE)): 
          batch_img_train = img_train[i:i+BATCH_SIZE].view(-1, 3, 224, 224)
          batch_mask_train = mask_train[i:i+BATCH_SIZE].view(-1, 1, 224, 224)
        
          model.zero_grad()

          outputs = model(batch_img_train)
        
          loss = loss_function(outputs, batch_mask_train.squeeze(1).long())
          loss = loss.mean()
          loss.backward()
          optimizer.step()    # Does the update

      loss_value = round(float(f"{loss}"), 3)
      wcell1=ws.cell(epoch+2,1)
      wcell1.value=(f"{epoch}")
      wcell2=ws.cell(epoch+2,2)
      wcell2.value= (f"{loss_value}")

      print(f"Epoch: {epoch}, Loss: {loss}")

  Model_Name = f"{loss_value}-model"
  wcell_header1 = ws.cell(1,1)
  wcell_header1.value = 'Epoch'
  wcell_header2 = ws.cell(1,2 )
  wcell_header2.value = 'Loss Value'
  ws.title = (Model_Name)    
  wb.save("/content/drive/MyDrive/Thesis_Quilang/Epoch_loss.xlsx")

  
  path = F"/content/drive/MyDrive/Thesis_Quilang/{Model_Name}.pt" 
  torch.save(model.state_dict(), path)


      
train(model)


def sample_predict():

  trained_model = ESNet(classes=61)
  path = F"/content/drive/MyDrive/Thesis_Quilang/0.043-model.pt" 
  trained_model.load_state_dict(torch.load(path))

  trained_model.eval()
  net_out = trained_model(img_train[299].view(-1, 3, 224, 224))
  print(net_out)

  om = torch.argmax(net_out, dim=0).detach().cpu()
  print(om)
  print(np.unique(om))

  return net_out

target = sample_predict()