I use pytorch to evaluate the run time of VGG16 and Mobilenet_v2,and I use batch size equal to 1.
When batch size is 1,my VGG16 result is Avg time: 0.0051 s (197.2 fps).
Mobilenet_v2 result is Avg time: 0.0063 s (157.8 fps).
I wonder why Mobilenet_v2 is slower than VGG16 when batch size 1.
if Batch size equal to 12,the result of VGG16 is Avg time: 0.0349 s (28.7 fps).
While Mobilenet_v2 is Avg time: 0.0064 s (156.7 fps).
is my code have something wrong? Why is VGG16 so faster or Mobilenet_v2 slow? Or evaluation run time can’t use batch size equal to 1.
Thank.
My environment
Python 3.6.10
Pytorch 1.4.0
torchvision 0.5.0
Cuda 10.0.130
cuDNN 7603
GPU RTX2080Ti
CPU Intel® Xeon® CPU E3-1231 v3 @ 3.40GHz
This is my code.
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import models
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np
import cv2
from tensorboardX import SummaryWriter
import argparse
from torchsummaryX import summary
import time
import warnings
warnings.filterwarnings("ignore")
if __name__ == "__main__":
device = torch.device('cuda')
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic =True
parser = argparse.ArgumentParser()
parser.add_argument('--backbone', help='the backbone model', type=str, default="Mobile_v2")
args = parser.parse_args()
#print(args)
phase = 'train'
shape_r,shape_c = 224,224
backbone_name = args.backbone
model_name = f'{backbone_name}_pre'
print(model_name)
if(backbone_name == 'VGG16'):
backbone = models.vgg16(pretrained=True).to(device)
if(backbone_name == 'Mobile_v2'):
backbone = torch.hub.load('pytorch/vision:v0.5.0', 'mobilenet_v2', pretrained=True).to(device)
#Time it
def time_cal2(m):
times = []
global device
batchsize = 1
#with torch.autograd.profiler.profile(use_cuda=True) as prof:
with torch.no_grad():
m.eval()
for i in range(10):
x_i = torch.rand(batchsize,3,shape_r,shape_c).to(device).contiguous()
y = m.forward(x_i)
print(y.size())
for i in range(100):
x_i = torch.rand(batchsize,3,shape_r,shape_c).to(device).contiguous()
torch.cuda.synchronize()
t0 = time.time()
y = m.forward(x_i)
torch.cuda.synchronize()
dt = time.time() - t0
times.append(dt)
dt = sum(times) / len(times)
print(f"Avg time: {dt:.4f} s ({1 / dt:.1f} fps)")
#print(prof.key_averages().table())
return dt
#summary(backbone,torch.rand(1,3,shape_r,shape_c).to(device))
print(backbone_name)
time_cal2(backbone)