```
from efficientnet_pytorch import EfficientNet
class modelController:
def __init__(self):
self.model = self.get_model()
self.data_transforms = transforms.Compose([
transforms.Resize((224,112)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
def get_model(self):
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes = 2)
model.cuda()
model.eval()
model.half()
return model
def get_model_inference(self, img):
with torch.no_grad():
result = self.model(img)
result = self.get_inference(result)
return result
```

```
import cv2
import numpy as np
from classifier import modelController
import torch
import time
import copy
mc = modelController()
tensor = torch.rand((32,3,224,112))
tensor = tensor.cuda()
tensor = tensor.half()
print(tensor.size())
# print(final_img.size())
total_time = 0
for i in range(50):
t1 = time.time()
get_person_cat(tensor)
t2 = time.time() - t1
print("time taken: ", t2)
total_time += t2
print("avg totatl time: ", total_time/50)
```

I have an efficientnet-b0 model. I am getting 0.0173 ms average inference time with model.half() and tensor.half() but when I comment out model.half() and tensor.half(), I get 0.0172 ms average inference time. Why is the fp16 model not taking less time to infer?