nn.Parameter not learning

This is my Visual Question Answering Model

class VQAModel(nn.Module):
def init(self, num_questions):
super(VQAModel, self).init()

    self.cnn = models.resnet50(pretrained=True)
    self.cnn.fc = nn.Identity() 

    self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))

    self.fc = nn.Linear(2048 + 512, 1) 

def forward(self, image):
    image_features = self.cnn(image)
    batch_size = image.size(0)
    num_questions = self.question_embeddings.size(0)
    image_features_expanded = image_features.unsqueeze(1).repeat(1, num_questions, 1)

    combined_features = torch.cat((image_features_expanded, self.question_embeddings.unsqueeze(0).repeat(batch_size, 1, 1)), dim=2)  # Shape: (batch_size, num_questions, 2560)

    combined_features = combined_features.view(batch_size * num_questions, -1) 
    output = self.fc(combined_features) 
    output = output.view(batch_size, num_questions)  

    return output

It is running , but how much ever I try , the nn.Parameter is not getting updated over any epoch .

What is a suitable solution for this.

Hi Aniruth!

Works for me.

Here is a script that contains your model, together with some code to run it:

import torch
print (torch.__version__)
import torchvision
print (torchvision.__version__)

from torch import nn
from torchvision import models

_ = torch.manual_seed (2024)

class VQAModel(nn.Module):
    def __init__(self, num_questions):
        super(VQAModel, self).__init__()
        
        self.cnn = models.resnet50(pretrained=True)
        self.cnn.fc = nn.Identity() 
        
        self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))
        
        self.fc = nn.Linear(2048 + 512, 1) 

    def forward(self, image):
        image_features = self.cnn(image)
        batch_size = image.size(0)
        num_questions = self.question_embeddings.size(0)
        image_features_expanded = image_features.unsqueeze(1).repeat(1, num_questions, 1)
        
        combined_features = torch.cat((image_features_expanded, self.question_embeddings.unsqueeze(0).repeat(batch_size, 1, 1)), dim=2)  # Shape: (batch_size, num_questions, 2560)
        
        combined_features = combined_features.view(batch_size * num_questions, -1) 
        output = self.fc(combined_features) 
        output = output.view(batch_size, num_questions)  
        
        return output

vqa = VQAModel (5)

opt = torch.optim.SGD (vqa.parameters(), lr = 0.1)

embeddings_before = vqa.question_embeddings.detach().clone()

ima = torch.randn (1, 3, 64, 64)
resa = vqa (ima)
loss = resa.sum()
loss.backward()
print ('max grad:')
print ('vqa.question_embeddings.grad.abs().max():', vqa.question_embeddings.grad.abs().max())
opt.step()

embeddings_after = vqa.question_embeddings.detach().clone()

print ('max embeddings diff:')
print ('(embeddings_after - embeddings_before).abs().max():', (embeddings_after - embeddings_before).abs().max())

And here is its output:

2.3.1
0.18.1
<path_to_pytorch_install>\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
  warnings.warn(
<path_to_pytorch_install>\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.
  warnings.warn(msg)
max grad:
vqa.question_embeddings.grad.abs().max(): tensor(0.0198)
max embeddings diff:
(embeddings_after - embeddings_before).abs().max(): tensor(0.0020)

As you can see, a single optimizer step modifies:

self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))

Please post a simplified, fully-self-contained, runnable script that illustrates
your issue, together with the output you get when you run it.

Best.

K. Frank

1 Like

Ok thank you @KFrank . will check it out.