Hi, I’m building a model that will freeze some layers after several training steps. But the weights which should be frozen are still updated.
Here’s a piece of code to reproduce the issue. The printed self.conv1.weight
keeps changing even though it’s already detached
import torch
from torch import nn
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 8, 3, padding=1)
self.conv2 = nn.Conv2d(8, 8, 3, padding=1)
self.pool = nn.AdaptiveAvgPool2d(1)
self.linear = nn.Linear(8, 2)
self.loss = nn.CrossEntropyLoss(reduction="mean")
def forward(self, x, y, step):
if step < 5:
self.conv1.train()
self.conv2.eval()
else:
self.conv1.eval()
self.conv2.train()
if step < 5:
x = self.conv1(x)
else:
with torch.no_grad():
x = self.conv1(x).detach()
x = self.conv2(x)
x = self.pool(x)[:, :, 0, 0]
x = self.linear(x)
print(step, self.conv1.weight.mean())
loss = self.loss(x, y)
return loss
model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
for i in range(10):
input_data = torch.rand(size=[2, 3, 16, 16])
target_data = (torch.rand(size=[2]) * 2).long()
loss = model(input_data, target_data, i)
optimizer.zero_grad()
loss.backward()
optimizer.step()