Zero loss values for resnet segmentation

I am training a resnet model for image segmentation (two classes, image and binary mask for training). While my accuracy values are very high (~0.99), both my training and validation losses are zeroes throughout all epochs. What might be the issue? I have attached a section of my code for training the model below, thank you in advance!

for epoch in range(num_epochs):
    #zero out epoch based performance variables 
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    all_acc = {key: 0 for key in phases} 
    all_loss = {key: torch.zeros(0).to(device) for key in phases}
    cmatrix = {key: np.zeros((2,2)) for key in phases}

    for phase in phases: #iterate through both training and validation states

        if phase == 'train':
            model.train()  # Set model to training mode
        else: #when in eval mode, we don't want parameters to be updated
            model.eval()   # Set model to evaluate mode
        running_loss = 0.0
        running_corrects = 0
        for ii , (X, y, y_weight) in enumerate(dataLoader[phase]): #for each of the batches
            optim.zero_grad()
            X = X.to(device)  # [Nbatch, 3, H, W]
            y_weight = y_weight.type('torch.FloatTensor').to(device)
            y = y.type('torch.LongTensor').to(device)  # [Nbatch, H, W] with class indices (0, 1)
            with torch.set_grad_enabled(phase == 'train'): #dynamically set gradient computation, in case of validation, this isn't needed
                                                            #disabling is good practice and improves inference time

                prediction = model_ft(X)  # [N, Nclass]
                y = y[:, 0,0]
                loss = criterion(prediction, y)
                _, preds = torch.max(X, 1)
                preds = preds[:,0,0]
                if phase=="train": #in case we're in train mode, need to do back propogation
                    
                    loss.mean().backward()
                    optim.step()
                
                #all_loss[phase]=torch.cat((all_loss[phase],loss.detach().view(1,-1)))

                
        #all_acc[phase]=(cmatrix[phase]/cmatrix[phase].sum()).trace()
        #all_loss[phase] = all_loss[phase].cpu().numpy().mean()
            running_loss += loss.data[0]
            running_corrects += torch.sum(preds == y)
        epoch_loss = running_loss / len(dataLoader[phase].dataset)
        epoch_acc = running_corrects.double() / len(dataLoader[phase].dataset)

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

        # deep copy the model
        if phase == 'val' and epoch_acc > best_acc:
           best_acc = epoch_acc
           best_model_wts = copy.deepcopy(model.state_dict())
        if phase == 'val':
           val_acc_history.append(epoch_acc)

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

model.load_state_dict(best_model_wts)