This is my Visual Question Answering Model
class VQAModel(nn.Module):
def init (self, num_questions):
super(VQAModel, self).init ()
self.cnn = models.resnet50(pretrained=True)
self.cnn.fc = nn.Identity()
self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))
self.fc = nn.Linear(2048 + 512, 1)
def forward(self, image):
image_features = self.cnn(image)
batch_size = image.size(0)
num_questions = self.question_embeddings.size(0)
image_features_expanded = image_features.unsqueeze(1).repeat(1, num_questions, 1)
combined_features = torch.cat((image_features_expanded, self.question_embeddings.unsqueeze(0).repeat(batch_size, 1, 1)), dim=2) # Shape: (batch_size, num_questions, 2560)
combined_features = combined_features.view(batch_size * num_questions, -1)
output = self.fc(combined_features)
output = output.view(batch_size, num_questions)
return output
It is running , but how much ever I try , the nn.Parameter is not getting updated over any epoch .
What is a suitable solution for this.
KFrank
(K. Frank)
June 21, 2024, 12:50am
2
Hi Aniruth!
Aniruth_Sundararaja1:
class VQAModel(nn.Module):
def init (self, num_questions):
super(VQAModel, self).init ()
self.cnn = models.resnet50(pretrained=True)
self.cnn.fc = nn.Identity()
self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))
self.fc = nn.Linear(2048 + 512, 1)
def forward(self, image):
image_features = self.cnn(image)
batch_size = image.size(0)
num_questions = self.question_embeddings.size(0)
image_features_expanded = image_features.unsqueeze(1).repeat(1, num_questions, 1)
combined_features = torch.cat((image_features_expanded, self.question_embeddings.unsqueeze(0).repeat(batch_size, 1, 1)), dim=2) # Shape: (batch_size, num_questions, 2560)
combined_features = combined_features.view(batch_size * num_questions, -1)
output = self.fc(combined_features)
output = output.view(batch_size, num_questions)
return output
It is running , but how much ever I try , the nn.Parameter is not getting updated over any epoch .
Works for me.
Here is a script that contains your model, together with some code to run it:
import torch
print (torch.__version__)
import torchvision
print (torchvision.__version__)
from torch import nn
from torchvision import models
_ = torch.manual_seed (2024)
class VQAModel(nn.Module):
def __init__(self, num_questions):
super(VQAModel, self).__init__()
self.cnn = models.resnet50(pretrained=True)
self.cnn.fc = nn.Identity()
self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))
self.fc = nn.Linear(2048 + 512, 1)
def forward(self, image):
image_features = self.cnn(image)
batch_size = image.size(0)
num_questions = self.question_embeddings.size(0)
image_features_expanded = image_features.unsqueeze(1).repeat(1, num_questions, 1)
combined_features = torch.cat((image_features_expanded, self.question_embeddings.unsqueeze(0).repeat(batch_size, 1, 1)), dim=2) # Shape: (batch_size, num_questions, 2560)
combined_features = combined_features.view(batch_size * num_questions, -1)
output = self.fc(combined_features)
output = output.view(batch_size, num_questions)
return output
vqa = VQAModel (5)
opt = torch.optim.SGD (vqa.parameters(), lr = 0.1)
embeddings_before = vqa.question_embeddings.detach().clone()
ima = torch.randn (1, 3, 64, 64)
resa = vqa (ima)
loss = resa.sum()
loss.backward()
print ('max grad:')
print ('vqa.question_embeddings.grad.abs().max():', vqa.question_embeddings.grad.abs().max())
opt.step()
embeddings_after = vqa.question_embeddings.detach().clone()
print ('max embeddings diff:')
print ('(embeddings_after - embeddings_before).abs().max():', (embeddings_after - embeddings_before).abs().max())
And here is its output:
2.3.1
0.18.1
<path_to_pytorch_install>\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
<path_to_pytorch_install>\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
max grad:
vqa.question_embeddings.grad.abs().max(): tensor(0.0198)
max embeddings diff:
(embeddings_after - embeddings_before).abs().max(): tensor(0.0020)
As you can see, a single optimizer step modifies:
self.question_embeddings = nn.Parameter(torch.randn(num_questions, 512))
Please post a simplified, fully-self-contained, runnable script that illustrates
your issue, together with the output you get when you run it.
Best.
K. Frank
1 Like
Ok thank you @KFrank . will check it out.