Pytorch Unet Model not converging

import torch
import torch.nn as nn

class conv_block(nn.Module):
def init(self, in_c, out_c):
super().init()

    self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1)
    self.bn1 = nn.BatchNorm2d(out_c)

    self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
    self.bn2 = nn.BatchNorm2d(out_c)
    self.conv3=nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
    self.bn3= nn.BatchNorm2d(out_c)
    self.relu=nn.ReLU()

    self.conv4=nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
    self.bn4= nn.BatchNorm2d(out_c)
    self.relu=nn.ReLU()

    self.conv5=nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
    self.bn5= nn.BatchNorm2d(out_c)
    self.relu=nn.ReLU()
    self.conv6=nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
    self.bn6= nn.BatchNorm2d(out_c)
    self.relu=nn.ReLU()

    self.conv7=nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
    self.bn7= nn.BatchNorm2d(out_c)
    self.relu=nn.ReLU()

  



    self.relu = nn.ReLU()

def forward(self, inputs):
    x = self.conv1(inputs)
    x = self.bn1(x)
    x = self.relu(x)

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    x=self.conv3(x)
    x=self.bn3(x)
    x=self.relu(x)

    x=self.conv4(x)
    x=self.bn4(x)
    x=self.relu(x)

    x=self.conv5(x)
    x=self.bn5(x)
    x=self.relu(x)
    x=self.conv6(x)
    x=self.bn6(x)
    x=self.relu(x)

    x=self.conv7(x)
    x=self.bn7(x)
    x=self.relu(x)

   

   
    return x

class encoder_block(nn.Module):
def init(self, in_c, out_c):
super().init()

    self.conv = conv_block(in_c, out_c)
    self.pool = nn.MaxPool2d((2, 2))

def forward(self, inputs):
    x = self.conv(inputs)
    p = self.pool(x)

    return x, p

class decoder_block(nn.Module):
def init(self, in_c, out_c):
super().init()

    self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=0)
    self.conv = conv_block(out_c+out_c, out_c)

def forward(self, inputs, skip):
    x = self.up(inputs)
    if x.shape != skip.shape:
      x = TF.resize(x, size=skip.shape[2:])
    x = torch.cat([x, skip], axis=1)
    x = self.conv(x)
    return x

class build_unet(nn.Module):
def init(self):
super().init()

    """ Encoder """
    self.e1 = encoder_block(3, 64)
    self.e2 = encoder_block(64, 128)
    self.e3 = encoder_block(128, 256)
    self.e4 = encoder_block(256, 512)

    """ Bottleneck """
    self.b = conv_block(512, 1024)

    """ Decoder """
    self.d1 = decoder_block(1024, 512)
    self.d2 = decoder_block(512, 256)
    self.d3 = decoder_block(256, 128)
    self.d4 = decoder_block(128, 64)

    """ Classifier """
    self.outputs = nn.Conv2d(64, 1, kernel_size=1, padding=0)

def forward(self, inputs):
    """ Encoder """
    s1, p1 = self.e1(inputs)
    s2, p2 = self.e2(p1)
    s3, p3 = self.e3(p2)
    s4, p4 = self.e4(p3)

    """ Bottleneck """
    b = self.b(p4)

    """ Decoder """
    d1 = self.d1(b, s4)
    d2 = self.d2(d1, s3)
    d3 = self.d3(d2, s2)
    d4 = self.d4(d3, s1)

    outputs = self.outputs(d4)

    return outputs

if name == “main”:
x = torch.randn((2, 3, 512, 512))
f = build_unet()
y = f(x)
print(y.shape)

This is the code for the model input images greyscale 512X512 with mask labels

The loss fund is:
import torch.nn.functional as F

class DiceLoss(nn.Module):
def init(self, weight=None, size_average=True):
super(DiceLoss, self).init()

def forward(self, inputs, targets, smooth=1):

    #comment out if your model contains a sigmoid or equivalent activation layer
    inputs = torch.sigmoid(inputs)

    #flatten label and prediction tensors
    inputs = inputs.view(-1)
    targets = targets.view(-1)

    intersection = (inputs * targets).sum()
    dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)

    return 1 - dice

class DiceBCELoss(nn.Module):
def init(self, weight=None, size_average=True):
super(DiceBCELoss, self).init()

def forward(self, inputs, targets, smooth=1):

    #comment out if your model contains a sigmoid or equivalent activation layer
    inputs = torch.sigmoid(inputs)

    #flatten label and prediction tensors
    inputs = inputs.view(-1)
    targets = targets.view(-1)

    intersection = (inputs * targets).sum()
    dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)
    BCE = F.binary_cross_entropy(inputs, targets, reduction='mean')
    Dice_BCE = BCE + dice_loss

    return Dice_BCE

The code for training : from torch.utils.data import DataLoader
from glob import glob

def train(model, loader, optimizer, loss_fn, device):
epoch_loss = 0.0

model.train()
for x, y in loader:
    x = x.to(device, dtype=torch.float32)
    y = y.to(device, dtype=torch.float32)

    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimizer.step()
    epoch_loss += loss.item()

epoch_loss = epoch_loss/len(loader)
return epoch_loss

def evaluate(model, loader, loss_fn, device):
epoch_loss = 0.0

model.eval()
with torch.no_grad():
    for x, y in loader:
        x = x.to(device, dtype=torch.float32)
        y = y.to(device, dtype=torch.float32)

        y_pred = model(x)
        
        loss = loss_fn(y_pred, y)
        epoch_loss += loss.item()
        

    epoch_loss = epoch_loss/len(loader)
return epoch_loss

if name == “main”:
“”" Seeding “”"
seeding(42)

""" Directories """
create_dir("files")

""" Load dataset """
train_x = sorted(glob("/content/drive/MyDrive/Data_brain/train/image/*")[:10])
train_y = sorted(glob("/content/drive/MyDrive/Data_brain/train/mask/*")[:10])

valid_x = sorted(glob("/content/drive/MyDrive/Data_brain/test/image/*")[:5])
valid_y = sorted(glob("/content/drive/MyDrive/Data_brain/test/mask/*")[:5])

data_str = f"Dataset Size:\nTrain: {len(train_x)} - Valid: {len(valid_x)}\n"
print(data_str)
""" Hyperparameters """

size = (512, 512)
batch_size = 2
num_epochs = 30
lr = 0.05
checkpoint_path = "files/checkpoint.pth"

train_dataset = DriveDataset(train_x, train_y)
valid_dataset = DriveDataset(valid_x, valid_y)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0
)

valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0
)

device = torch.device('cuda')  
model = build_unet()
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True)
loss_fn = DiceBCELoss()

best_valid_loss = float("inf")

val_loss=[]
trai_loss=[]
t=[]

for epoch in range(num_epochs):
    start_time = time.time()

    train_loss = train(model, train_loader, optimizer, loss_fn, device)
    valid_loss = evaluate(model, valid_loader, loss_fn, device)
    val_loss.append(valid_loss)
    trai_loss.append(train_loss)
    
    t.append(epoch)
    """ Saving the model """
    
    if valid_loss < best_valid_loss:
        data_str = f"Valid loss improved from {best_valid_loss:2.4f} to {valid_loss:2.4f}. Saving checkpoint: {checkpoint_path}"
        print(data_str)
        
        best_valid_loss = valid_loss
        
        
        torch.save(model.state_dict(), checkpoint_path)

    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    data_str = f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s\n'
    data_str += f'\tTrain Loss: {train_loss:.3f}\n'
    data_str += f'\t Val. Loss: {valid_loss:.3f}\n'
    print(data_str)







The model is not even overfitting training data