For some reason the code below seems to have a memory leak. I can’t seem to figure out the exact cause, but it only occurs when using the L-BFGS optimizer.
import torch
from typing import Tuple
class SampleDataset(torch.utils.data.Dataset):
"""Simple dataset for collected samples."""
def __init__(self, data: torch.Tensor, labels: torch.Tensor) -> None:
self.data = [data[i].clone() for i in range(data.shape[0])]
self.labels = [labels[i].clone() for i in range(labels.shape[0])]
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
return self.data[idx], self.labels[idx]
def __len__(self) -> int:
return len(self.data)
def train_model(model: torch.nn.Module, dataloader, device: torch.device, num_epochs: int = 5):
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.LBFGS(
model.parameters(), lr=1.0, max_iter=1, tolerance_change=-1, tolerance_grad=-1
)
for n in range(num_epochs):
print("Epoch", n + 1)
for inputs, labels in dataloader:
inputs, labels = inputs.to(device), labels.to(device)
def closure() -> torch.Tensor:
optimizer.zero_grad()
output = model(inputs)
loss = criterion(output, labels)
loss.backward()
return loss
optimizer.step(closure)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
activation_samples = torch.randn(363, 2560, 9, 9)
activation_labels = list(range(0, 121)) * 3
activation_labels = torch.as_tensor(activation_labels)
sample_data = activation_samples.reshape(activation_samples.shape[0], -1).double()
# Setup dataset
sample_dataset = SampleDataset(sample_data.cpu(), activation_labels.cpu())
dataloader = torch.utils.data.DataLoader(
sample_dataset, batch_size=8, num_workers=0, shuffle=True
)
model = torch.nn.Linear(sample_data.shape[1], 121, bias=False).to(device).double()
model = model.train()
train_model(model, dataloader, device)