UNET segmenting only few classes


(Preet Khaturia) #1

Hello, I am using UNET to classifying my Satellite Images, and it only classifies some of the classes. Can anyone tell what is wrong? I am also attaching some of the code.
@ptrblck

class SoftDiceLoss(nn.Module):
def __init__(self, weight=None, size_average=True):
    super(SoftDiceLoss, self).__init__()

def forward(self, logits, targets):
    smooth = 1
    num = targets.size(1)
    probs = (logits)
    m1 = probs.view(num, -1)
    m2 = targets.view(num, -1)
    intersection = (m1 * m2)

    score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
    score = 1 - score.sum() / num
    return score
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, train=True):   # initial logic happens like transform
  
    self.image_paths = image_paths
    self.target_paths = target_paths
   
    self.transforms  = transforms.Compose([
           
transforms.ToTensor()]) 

    self.transforms2 =transforms.Compose([transforms.ToTensor()])



def __getitem__(self, index):

    img =gdal.Open(self.image_paths[index],gdal.GA_ReadOnly)
    img= img.ReadAsArray()
   
    img = np.moveaxis(img,0,-1)
 
    img = img.astype(np.float64)
 
    #img = Image.fromarray(img,'RGBA')
   # img.verify()
    mask = gdal.Open(self.target_paths[index],gdal.GA_ReadOnly)
    mask = mask.ReadAsArray()
    
    mask = to_categorical(mask, 15)
    
    mask =np.expand_dims(mask,axis=0)
    mask = np.moveaxis(mask,0,-1) 
    mask = mask.astype(np.float64)
    mask1 = np.zeros((250,250,15),dtype=np.float64)
   
    for i in range(15):
        mask1[:,:,i]=(Image.fromarray(mask[:,:,i].reshape(250,250)))
    
     # normalizing per channel data:
    img_a= img[:,:,0]
 
    img_b = img[:,:,1]
    img_c= img[:,:,2]
    img_d=img[:,:,3]
    img_a = (img_a - np.min(img_a)) / (np.max(img_a) - np.min(img_a))
    img_b = (img_b - np.min(img_b)) / (np.max(img_b) - np.min(img_b))
    img_c = (img_c - np.min(img_c)) / (np.max(img_c) - np.min(img_c))
    img_d = (img_d - np.min(img_d)) / (np.max(img_d) - np.min(img_d))
 
    # putting the 3 channels back together:
    img_norm = np.zeros_like(img)
    img_norm[:, :, 0] = img_a
    img_norm[:, :, 1] = img_b
    img_norm[:, :, 2] = img_c
    img_norm[:, :, 3] = img_d
        
    t_image = self.transforms(img_norm)
    t_masks = self.transforms2(mask1)
    return t_image, t_masks

def __len__(self):  # return count of sample we have

    return len(self.image_paths)
train_dataset = CustomDataset(X_train, Y_train, train=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=1)

val_dataset = CustomDataset(X_val,Y_val, train=False)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=1)



test_dataset = CustomDataset(X_test,Y_test, train=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=1)


class UNet(nn.Module):
def __init__(self, n_channels, n_classes):
    super(UNet, self).__init__()
    self.inc = inconv(n_channels, 32)
    self.down1 = down(32,64)
    self.down2 = down(64,128)
    self.down3 = down(128,128)
    
    self.up1 = up(256, 64)
    self.up2 = up(128,32)
    self.up3 = up(64,32)
    self.outc = outconv(32, n_classes)
    self.drop =nn.Dropout(0.5)

def forward(self, x):
    x1 = self.inc(x)

    x1 = self.drop(x1)
    x2 = self.down1(x1)

    x2 = self.drop(x2)
    x3 = self.down2(x2)

    x3 = self.drop(x3)
    x4 = self.down3(x3)

    x4 = self.drop(x4)
    
    x = self.up1(x4, x3)

    x = self.drop(x)
    x = self.up2(x, x2)

    x = self.drop(x)
    x = self.up3(x, x1)

    x = self.drop(x)
    x = self.outc(x)
    
    return (F.softmax(x,dim=1))

def weights_init_uniform(m):
  classname = m.__class__.__name__
# for every Linear layer in a model..
if classname.find('Linear') != -1:
    # apply a uniform distribution to the weights and a bias=0
    m.weight.data.uniform_(0.0, 1.0)
    m.bias.data.fill_(0)
    
model = UNet(4,15)
model.apply(weights_init_uniform)

(Juan F Montesinos) #2

Unet is not a network designed to classify, it’s a segmentator. You should use another architecture.


#3

I assume by “classifies some of the classes” you mean that your predicted segmentations ignore some classes completely?
If your target classes are imbalanced, you could try to weight the pixel values according to the number of pixels for the particular class.


(Preet Khaturia) #4

@ptrblck yes your assumption is correct. Can you please explain how can I do that ? Should I put weight in the loss funtion?

I am using dice loss as cost function.

class SoftDiceLoss(nn.Module):
def init(self, weight=None, size_average=True):
super(SoftDiceLoss, self).init()

def forward(self, logits, targets):
    smooth = 1
    num = targets.size(1)
    probs = (logits)
    m1 = probs.view(num, -1)
    m2 = targets.view(num, -1)
    intersection = (m1 * m2)

    score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
    score = 1 - score.sum() / num
    return score

#5

Could you explain a bit more how your dice loss is currently working?
I’m not sure to understand it completely.
Are you passing the logits and targets for each class into the function?
If not, how are the logits and targets correspond to each other?
I assume logits has a shape of [batch_size, nb_classes, h, w], while targets is [batch_size, h, w]. Is that correct?


(Preet Khaturia) #6

My target and logits both have the same shape. I have generated binary masks for the target corresponding to each class. And the Unet model outputs logits for each class.

Both have the shape --> [batch_size, nb_classes,h,w]