Given the code fragment below, I freeze the weights of the neural network using model.fc1.requires_grad_(False)
and model.fc2.requires_grad_(False)
.
If I use loss = criterion(outputs, y_batch)
to calculate the loss, the training is fine. Why does it not cause the error RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
?
I think it should cause the error since all layers of the neural network have been frozen.
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Define a simple neural network
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.fc1(x)
x = self.fc2(x)
return x
input_size = 3
hidden_size = 3
output_size = 3
batch_size = 16
num_epochs = 1
learning_rate = 0.01
num_samples = 1000
X_train = torch.randn(num_samples, input_size, requires_grad = True)
y_train = torch.ones((num_samples, output_size))
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model = SimpleNet(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
total_loss = 0
for batch_idx, (X_batch, y_batch) in enumerate(dataloader):
optimizer.zero_grad()
outputs = model(X_batch)
loss = criterion(outputs, y_batch)
print(loss._grad_fn)
loss.backward()
optimizer.step()
if (batch_idx + 1) % 10 == 0:
print(f'Batch [{batch_idx + 1}/{len(dataloader)}], Loss: {loss.item():.4f}')
model.fc1.requires_grad_(False)
model.fc2.requires_grad_(False)
print("Training complete!")