Hi,
I’m fairly new to pytorch so this will probably seem like a silly question, but here we go: I’m curious about the expected throughput of inference on CPUs while using various modes of pytorch. Using one of the pretrained models I benchmarked it on an 8-core ryzen machine with the below script but I’m seeing times that seem rather slow (around ~2.4 seconds for a batch size of 16, plus or minus a bit depending on eager/trace/script mode). Could someone tell me what I’m doing wrong? Much appreciated
import torchvision
import torch
import pprint
import timeit
from timeit import default_timer as timer
def benchmark_pytorch_cpu(repeat_runs, model, example):
results = list()
# warmup
for i in range(3):
model(example)
for i in range(repeat_runs):
start = timer()
with torch.no_grad():
model(example)
end = timer()
results.append(end - start)
return results
def pytorch_eager_mode(config):
repeat_runs = config['repeat_runs']
example = torch.rand(config['batch_size'], 3, 224, 224)
model = torchvision.models.resnet50(pretrained=True)
model.eval()
return benchmark_pytorch_cpu(repeat_runs, model, example)
def pytorch_script_mode(config):
repeat_runs = config['repeat_runs']
example = torch.rand(config['batch_size'], 3, 224, 224)
model = torchvision.models.resnet50(pretrained=True)
with torch.jit.optimized_execution(True):
model = torch.jit.script(model, example)
model.eval()
return benchmark_pytorch_cpu(repeat_runs, model, example)
def pytorch_trace_mode(config):
repeat_runs = config['repeat_runs']
example = torch.rand(config['batch_size'], 3, 224, 224)
model = torchvision.models.resnet50(pretrained=True)
with torch.jit.optimized_execution(True):
model = torch.jit.trace(model, example)
model.eval()
return benchmark_pytorch_cpu(repeat_runs, model, example)
config = {
'repeat_runs': 30,
'batch_size': 16,
'mode': 'all'
}
result = {
'eager_mode': pytorch_eager_mode(config),
'trace_mode': pytorch_trace_mode(config),
'script_mode': pytorch_script_mode(config)
}
pprint.pprint(result)