Hello all,
I have read many threads about ways to free memory and I wrote a simple example that tested my code, I believe I’m still missing something but cant seem to find what is it that I’m missing.
As to my knowledge I moved all of the Tensors to CPU and deleted them, I thought that should free the memory.
I’ve created a loop that every epoch clears the GPU memory, then it initiates a new model and trains. I thought that when ill stop in debug mode right after the “wipe_memory” call ill see that the GPU memory is free.
import gc
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
class Fitter:
def __init__(self, loss_fn):
self.model = self.init_model()
self.optimizer = self.init_optimizer()
self.loss_fn = loss_fn
def init_model(self):
return NeuralNetwork()
def init_optimizer(self):
return torch.optim.SGD(self.model.parameters(), lr=1e-3)
def wipe_memory(self): # DOES WORK
self._optimizer_to(torch.device('cpu'))
self._model_to(torch.device('cpu'))
del self.model
del self.optimizer
gc.collect()
torch.cuda.empty_cache()
@staticmethod
def _param_to_device(param, device):
# Not sure there are any global tensors in the state dict
if isinstance(param, torch.Tensor):
param.data = param.data.to(device)
if param._grad is not None:
param._grad.data = param._grad.data.to(device)
elif isinstance(param, dict):
for subparam in param.values():
if isinstance(subparam, torch.Tensor):
subparam.data = subparam.data.to(device)
if subparam._grad is not None:
subparam._grad.data = subparam._grad.data.to(device)
def _optimizer_to(self, device):
if self.optimizer.state is not None:
for param in self.optimizer.state.values():
self._param_to_device(param, device)
def _model_to(self, device):
if self.model.state_dict is not None:
for param in self.model.state_dict().values():
self._param_to_device(param, device)
def train(self, dataloader, device):
print(torch.cuda.memory_summary())
self.wipe_memory()
#### at this stage I should see that the memory is free, no? ####
self.model = self.init_model()
self.optimizer = self.init_optimizer()
self.model.to(device)
self.model.train()
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = self.model(X)
loss = self.loss_fn(pred, y)
# Backpropagation
loss.backward()
self.optimizer.step()
self.optimizer.zero_grad()
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
#
#
if __name__ == '__main__':
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor(),
)
batch_size = 64
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
for X, y in test_dataloader:
print(f"Shape of X [N, C, H, W]: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")
break
loss_fn = nn.CrossEntropyLoss()
fitter = Fitter(loss_fn)
device = f'cuda:0'
epochs = 1000
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
fitter.train(dataloader=train_dataloader, device=device)
print("Done!")
Your help is much appreciated!