I built a small model
class TinyVGG(nn.Module):
def __init__(self,input_features,
output_features,
hidden_units,
len_classes) -> None:
super().__init__()
self.conv_relu_layer_1 = nn.Sequential(
nn.Conv2d(in_channels=input_features,
out_channels=output_features,
kernel_size=3,
padding=1,
stride=1),
nn.ReLU()
)
self.conv_relu_maxpool_1 = nn.Sequential(
nn.Conv2d(in_channels=output_features,
out_channels=output_features,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3)
)
self.conv_relu_layer_2 = nn.Sequential(
nn.Conv2d(in_channels=output_features,
out_channels=output_features,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU()
)
self.conv_relu_maxpool_2 = nn.Sequential(
nn.Conv2d(in_channels=output_features,
out_channels=output_features,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3)
)
self.classfier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hidden_units*3*3,
out_features=len_classes)
)
def forward(self, x):
x = self.conv_relu_layer_1(x)
x = self.conv_relu_maxpool_1(x)
x = self.conv_relu_layer_2(x)
x = self.conv_relu_maxpool_2(x)
x = self.classfier(x)
return x
Tried to run it with
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=tinyvgg_GPU.parameters(),
lr=0.01)
epochs = 5
for epoch in range(epochs):
accuracy=0; loss=0
for data, target in train_dataloader:
tinyvgg_GPU.train()
data = data.to(device)
target = target.to(device)
y_logits = tinyvgg_GPU(data)
loss += loss_fn(y_logits, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss = loss/len(train_dataloader)
tinyvgg_GPU.eval()
with torch.inference_mode():
test_loss = 0; test_accuracy = 0
for data, target in test_dataloader:
data = data.to(device)
target = target.to(device)
test_logits = tinyvgg_GPU(data)
test_loss += loss_fn(test_logits, target)
test_loss = test_loss/len(test_dataloader)
print(f"Epoch: {epoch} | loss: {loss} | test_loss : {test_loss}")
This throwed me:
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
I found a reference model online
from torch import nn
class MNIST_model(torch.nn.Module):
"""Model capable of predicting on MNIST dataset.
"""
def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(in_channels=input_shape,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hidden_units*7*7,
out_features=output_shape)
)
def forward(self, x):
x = self.conv_block_1(x)
x = self.conv_block_2(x)
x = self.classifier(x)
return x
and to train it
# %%time
from tqdm.auto import tqdm
device = "cuda" if torch.cuda.is_available() else "cpu"
# Train on GPU
model_gpu = MNIST_model(input_shape=1,
hidden_units=10,
output_shape=10).to(device)
# Create a loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_gpu.parameters(), lr=0.1)
# Training loop
epochs = 5
for epoch in tqdm(range(epochs)):
train_loss = 0
model_gpu.train()
for batch, (X, y) in enumerate(train_dataloader):
# Put data on target device
X, y = X.to(device), y.to(device)
# Forward pass
y_pred = model_gpu(X)
# Loss calculation
loss = loss_fn(y_pred, y)
train_loss += loss
# Optimizer zero grad
optimizer.zero_grad()
# Loss backward
loss.backward()
# Step the optimizer
optimizer.step()
# Adjust train loss to number of batches
train_loss /= len(train_dataloader)
### Testing loop
test_loss_total = 0
# Put model in eval mode and turn on inference mode
model_gpu.eval()
with torch.inference_mode():
for batch, (X_test, y_test) in enumerate(test_dataloader):
# Make sure test data on target device
X_test, y_test = X_test.to(device), y_test.to(device)
test_pred = model_gpu(X_test)
test_loss = loss_fn(test_pred, y_test)
test_loss_total += test_loss
# Adjust test loss total for number of batches
test_loss_total /= len(test_dataloader)
# Print out what's happening
print(f"Epoch: {epoch} | Loss: {train_loss:.3f} | Test loss: {test_loss_total:.3f}")
The reference model works without any problem, I don’t understand the problem here. Can someone please explain. Thanks in advance.