Hi,
What you said about requires_grad.False
is true and as @Isaac_Kargar mentioned, you should not put model in eval
mode. Putting model in eval
mode, does not affect autograd engine and just uses eval mode in layers like dropout or batchnorm which in case of extracting features, it is not desired. (see this post)
Here is snippet that uses VGG16 with batch norm as feature extractor and a simple 1 layer linear model as model1 in your case. And it works just fine although the model is completely nonsense and I used simple code to demonstrate the idea.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import vgg16_bn
from torch import optim
# a model that gets another model as feature_extractor
class Model(nn.Module):
def __init__(self, feature_extractor):
super(Model, self).__init__()
self.feature_extractor = feature_extractor # model2 in your case
self.layer = nn.Linear(3*256*256, 1000,) # model1 in your case
def forward(self, x):
features = self.feature_extractor(x)
x = x.view(5, -1)
x = self.layer(x)
x += features # using features from model2
return x
model2 = vgg16_bn(pretrained=True).eval()
for param in model2.parameters():
param.requires_grad = False
model1 = Model(feature_extractor=model2)
model1.train()
criterion = nn.L1Loss()
# note we only pass parameters of model1 to optimizer (possible mistake?)
optimizer = optim.SGD(model1.parameters(), lr=0.001, momentum=0.9)
running_loss = 0.0
for i in range(3):
x = torch.randn(5, 3, 256, 256) # consider this as inputs that every batch changes
optimizer.zero_grad()
with torch.set_grad_enabled(True):
outputs = model1(x)
loss = criterion(outputs, torch.ones(outputs.shape)) # a weird loss!
loss.backward()
optimizer.step()
running_loss += loss.item() * x.size(0)
print(running_loss)
Here is a elaborate post about using pretrained models as feature extractors.
bests