Parameter not learning

I made a 2 step model with a u net and a gan as 2 consecutive steps.

the output that i get from the u net , i apply thresholding to get a mask , and pass the output and mask to the gan for inpainting.
i want to make the threshold also learnable .
i kept the threshold as nn.Parameter() , and also set required_grad = True , but then when I checkked while training the model , the parameter value is not getting updated at all.

The same init value of 0.5 is only coming.
Code

class Combined_Model(nn.Module):

def __init__(self , options):
    super(Combined_Model, self).__init__()

    self.pretrained_state_dict = torch.load(os.path.join(options.pretrained, 'G0000000.pt'), map_location=torch.device('cuda'))
    self.unet = UNet().to(options.device)


    if options.with_prompts:
        self.inpainter = Prompted_InpaintGenerator(options)
        self.org_gan = InpaintGenerator(options)
        #self.inpainter.load_state_dict(load_pretrained_weights(self.org_gan, self.pretrained_state_dict), strict=False)
        self.inpainter.load_state_dict(load_pretrained_weights(self.org_gan , self.inpainter) , strict=True)
    else:
        self.inpainter = InpaintGenerator(options)
        self.inpainter.load_state_dict(torch.load(os.path.join(options.pretrained, 'G0000000.pt'), map_location=options.device), strict=False)

    self.models = [self.unet, self.inpainter]

    self.learnable_threshold = nn.Parameter(torch.tensor(0.5), requires_grad=True)

def forward(self , x):

    unet_output = self.unet(x)
    unet_output_gray = tensor_to_cv2_gray(unet_output)
    flary_img_gray = tensor_to_cv2_gray(x)
    print(self.learnable_threshold)
    difference = (torch.from_numpy(flary_img_gray) - torch.from_numpy(unet_output_gray))
    #difference_tensor = torch.tensor(difference, dtype=torch.float32).to(options.device)
    difference_tensor = difference.clone().to(options.device)
    binary_mask = torch.where(difference_tensor > self.learnable_threshold, torch.tensor(1.0).to(options.device), torch.tensor(0.0).to(options.device))
    binary_mask = binary_mask.unsqueeze(1)

    inpainted_output = self.inpainter(unet_output , binary_mask)

    return inpainted_output

Hi @Aniruth_Sundararaja1,

When you re-wrap the output tensor from your Unet, you destroy the history of operations and hence don’t have a gradient. Make sure to not move your tensor from torch to numpy (and back again to torch).

Also, you should just be able to use binary_mask = torch.where(difference_tensor > self.learnable_threshold, 1, 0) (without creating new tensor), as scalars can easily broadcast across your multi-dimensional tensor.

Thanks @AlphaBetaGamma96

I tried out the solution as you said , but my param is still not changing
I am attaching my updated code as you has told for reference

def tensor_to_cv2_gray(tensor):
tensor = tensor.detach().cpu()
tensor = tensor.permute(0, 2, 3, 1)

r_factor = torch.tensor(0.2989, dtype=tensor.dtype, device=tensor.device)
g_factor = torch.tensor(0.5870, dtype=tensor.dtype, device=tensor.device)
b_factor = torch.tensor(0.1140, dtype=tensor.dtype, device=tensor.device)

gray_tensor = tensor[..., 0] * r_factor + tensor[..., 1] * g_factor + tensor[..., 2] * b_factor

gray_tensor = gray_tensor.unsqueeze(1).permute(0, 3, 1, 2)

return gray_tensor

DataLoader

train_dataset, train_dataloader = training_dataloader(options.batch_size, options.no_of_workers, options.train_dataset_path, options.image_size)

Models

def load_pretrained_weights(original_model, new_model):
orig_ = {}
new_ = {}
for (name, param) in original_model.named_parameters():
orig_[name] = param
for (name, param) in new_model.named_parameters():
if name in orig_:
new_[name] = orig_[name]
else:
new_[name] = torch.randn_like(param) / torch.norm(torch.randn_like(param))
return new_

class Combined_Model(nn.Module):

def __init__(self , options):
    super(Combined_Model, self).__init__()

    self.pretrained_state_dict = torch.load(os.path.join(options.pretrained, 'G0000000.pt'), map_location=torch.device('cuda'))
    self.unet = UNet().to(options.device)


    if options.with_prompts:
        self.inpainter = Prompted_InpaintGenerator(options)
        self.org_gan = InpaintGenerator(options)
        #self.inpainter.load_state_dict(load_pretrained_weights(self.org_gan, self.pretrained_state_dict), strict=False)
        self.inpainter.load_state_dict(load_pretrained_weights(self.org_gan , self.inpainter) , strict=True)
    else:
        self.inpainter = InpaintGenerator(options)
        self.inpainter.load_state_dict(torch.load(os.path.join(options.pretrained, 'G0000000.pt'), map_location=options.device), strict=False)

    self.models = [self.unet, self.inpainter]

    #self.learnable_threshold = nn.Parameter(torch.tensor(0.5), requires_grad=True)
    self.learnable_threshold = nn.Parameter(torch.tensor(0.5), requires_grad=True)



def forward(self , x):

    unet_output = self.unet(x)
    unet_output_gray = tensor_to_cv2_gray(unet_output)
    flary_img_gray = tensor_to_cv2_gray(x)
    #    difference = (torch.from_numpy(flary_img_gray) - torch.from_numpy(unet_output_gray))
    difference = (flary_img_gray) - (unet_output_gray)
    #difference_tensor = torch.tensor(difference, dtype=torch.float32).to(options.device)
    difference_tensor = difference.clone().to(options.device)
    binary_mask = torch.where(difference_tensor > self.learnable_threshold, 1 , 0)
    #binary_mask = self.learnable_threshold_layer(difference_tensor, options.device)
    #binary_mask = binary_mask.unsqueeze(1)
    binary_mask = binary_mask.permute(0 , 2 , 1 , 3)
    inpainted_output = self.inpainter(unet_output , binary_mask)

    return inpainted_output

For your reference , this is the output i get
0%| | 0/100 [00:00<?, ?it/s]torch.Size([2, 3, 256, 256]) torch.Size([2, 1, 256, 256])
Epoch Loss : 0.00016948020493146032
Learnable Threshold: 0.5
1%|▉ | 1/100 [02:40<4:25:12, 160.73s/it]torch.Size([2, 3, 256, 256]) torch.Size([2, 1, 256, 256])
Epoch Loss : 0.000250783225055784
Learnable Threshold: 0.5

Could you please look and tell , where i am going wrong

Update

when i tried checking in model names parameters , i could not find the learnble threshold parameter