Let’s try to use torch.profiler
for ResNet18:
import torch
import torch.profiler
import torchvision
rn18 = torchvision.models.resnet18().to("cuda:0")
rn18_params = {"x": torch.randn(16, 3, 224, 224).to("cuda:0")}
def run_model_profiler(callback, dict_params, wait=1, warmup=2, active=5, row_limit=15):
with torch.profiler.profile(
schedule=torch.profiler.schedule(wait=wait, warmup=warmup, active=active),
on_trace_ready=None,
profile_memory=False,
record_shapes=False,
with_stack=False,
with_flops=True,
) as prof:
for _ in range(wait + warmup + active):
_ = callback(**dict_params)
prof.step()
print(prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=row_limit))
run_model_profiler(rn18.forward, rn18_params)
- Why sum of all values in
Self CUDA %
doesn’t equal to 100? It’s much more than 100. - How to remove
ProfilerStep*
from result? I don’t want so that result contains it and affects percent stats.