Hi, I’m trying to record the CUDA GPU memory usage using the API torch.cuda.memory_allocated
. The target I want to achieve is that I want to draw a diagram of GPU memory usage(in MB) during forwarding.
This is the nn.Module
class I’m using that makes use of the class method register_forward_hook
of nn.Module
to get the memory usage before the forward method being called:
class Segment(torch.nn.Module):
def __init__(self,
specs_truncated,
in_channels,
use_checkpoint=False,
use_batch_norm=True,
):
super().__init__()
self.subspecs = specs_truncated
self.in_channels = in_channels
self.use_checkpoint = use_checkpoint
self.use_batch_norm = use_batch_norm
self.seq_of_nodes = self._make_sequential_by_specs_truncated()
self.handles_forward_hooks = self._register_forward_hooks(
[torch.nn.Conv2d, torch.nn.BatchNorm2d, torch.nn.ReLU, torch.nn.MaxPool2d],
lambda d: lambda m, i:
print("Memory allocated before execute {}-{}: {} MB".format(
d['name'], d['class'],
torch.cuda.memory_allocated()/1e6)),
lambda d: lambda m, i, o:
print("Memory allocated after execute {}-{}: {} MB".format(
d['name'], d['class'],
torch.cuda.memory_allocated()/1e6)),
)
def forward(self, x):
if self.use_checkpoint:
return checkpoint(self.seq_of_nodes, x, use_reentrant=False)
return self.seq_of_nodes(x)
def _make_sequential_by_specs_truncated(self):
layers = []
in_channels = self.in_channels
for node_spec in self.subspecs:
if node_spec == 'M':
layers += [
torch.nn.MaxPool2d(kernel_size=2),
]
else:
layers += [
torch.nn.Conv2d(in_channels, node_spec, kernel_size=3, padding='same'),
]
layers += [
torch.nn.BatchNorm2d(node_spec),
torch.nn.ReLU(),
][not self.use_batch_norm:]
in_channels = node_spec
return torch.nn.Sequential(*layers)
def _register_forward_hooks(self, list_targets, pre_hook, post_hook):
handles = {}
for name, module in self.seq_of_nodes.named_modules():
if isinstance(module, tuple(list_targets)):
# handles[name + '.pre'] = module.register_forward_pre_hook(
# pre_hook({ 'name': name, 'class': module.__class__.__name__ }))
handles[name + '.post'] = module.register_forward_hook(
post_hook({ 'name': name, 'class': module.__class__.__name__ }))
return handles
def deregister_forward_hooks(self):
for handle in self.handles_forward_hooks.values():
handle.remove()
But I got a very unintuitive result as I have described by the title:
Memory allocated after execute 0-Conv2d: 609.58208 MB
Memory allocated after execute 1-BatchNorm2d: 643.136512 MB
Memory allocated after execute 2-ReLU: 643.136512 MB
Memory allocated after execute 3-Conv2d: 643.136512 MB
Memory allocated after execute 4-BatchNorm2d: 643.136512 MB
Memory allocated after execute 5-ReLU: 643.136512 MB
Memory allocated after execute 6-MaxPool2d: 617.970688 MB
Memory allocated after execute 0-Conv2d: 601.193472 MB
Memory allocated after execute 1-BatchNorm2d: 617.970688 MB
Memory allocated after execute 2-ReLU: 617.970688 MB
Memory allocated after execute 3-Conv2d: 617.970688 MB
Memory allocated after execute 4-BatchNorm2d: 617.970688 MB
Memory allocated after execute 5-ReLU: 617.970688 MB
Memory allocated after execute 6-MaxPool2d: 605.912064 MB
Memory allocated after execute 0-Conv2d: 597.523456 MB
Memory allocated after execute 1-BatchNorm2d: 606.96064 MB
Memory allocated after execute 2-ReLU: 606.96064 MB
Memory allocated after execute 3-Conv2d: 606.96064 MB
Memory allocated after execute 4-BatchNorm2d: 606.96064 MB
Memory allocated after execute 5-ReLU: 606.96064 MB
Memory allocated after execute 6-Conv2d: 606.96064 MB
Memory allocated after execute 7-BatchNorm2d: 606.96064 MB
Memory allocated after execute 8-ReLU: 606.96064 MB
Memory allocated after execute 9-Conv2d: 606.96064 MB
Memory allocated after execute 10-BatchNorm2d: 606.96064 MB
Memory allocated after execute 11-ReLU: 606.96064 MB
Memory allocated after execute 12-MaxPool2d: 600.669184 MB
Memory allocated after execute 13-Conv2d: 595.426304 MB
Memory allocated after execute 14-BatchNorm2d: 597.523456 MB
Memory allocated after execute 15-ReLU: 597.523456 MB
Memory allocated after execute 16-Conv2d: 597.523456 MB
Memory allocated after execute 17-BatchNorm2d: 597.523456 MB
Memory allocated after execute 18-ReLU: 597.523456 MB
Memory allocated after execute 19-Conv2d: 597.523456 MB
Memory allocated after execute 20-BatchNorm2d: 597.523456 MB
Memory allocated after execute 21-ReLU: 597.523456 MB
Memory allocated after execute 22-Conv2d: 597.523456 MB
Memory allocated after execute 23-BatchNorm2d: 597.523456 MB
Memory allocated after execute 24-ReLU: 597.523456 MB
Memory allocated after execute 25-MaxPool2d: 594.377728 MB
Memory allocated after execute 26-Conv2d: 591.232 MB
Memory allocated after execute 27-BatchNorm2d: 591.232 MB
Memory allocated after execute 28-ReLU: 591.232 MB
Memory allocated after execute 29-Conv2d: 591.232 MB
Memory allocated after execute 30-BatchNorm2d: 591.232 MB
Memory allocated after execute 31-ReLU: 591.232 MB
Memory allocated after execute 32-Conv2d: 591.232 MB
Memory allocated after execute 33-BatchNorm2d: 591.232 MB
Memory allocated after execute 34-ReLU: 591.232 MB
Memory allocated after execute 35-Conv2d: 591.232 MB
Memory allocated after execute 36-BatchNorm2d: 591.232 MB
Memory allocated after execute 37-ReLU: 591.232 MB
As you can see, the memory usage reported by allocated_memory
shows that GPU usage was decreasing!
Could anyone help me identify whether or not I might misuse any API in my code? Thanks