Sure, here’s a more expansive example.
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
So I trained this simple CIFAR model for a single epoch. Using the .pth file and the model, I generated the onnx file and further the tensorrt file. I would attach the .trt file, but it’s not an authorized extension.
from abc import ABC, abstractmethod
import torch
import numpy as np
import tensorrt as trt
import pycuda.driver as cudadriver
import pycuda.autoinit as cudacontext
class TensorRTEngine(ABC):
def __init__(
self,
tensorrt_file: str,
output_shape: tuple,
multiple_returns: bool = False,
return_slices: list = [],
) -> None:
self.engine = self.load_tensorrt_engine(tensorrt_file)
self.output_shape = output_shape
self.multiple_returns = multiple_returns
self.return_slices = return_slices
self.execution_context = self.engine.create_execution_context()
self.execution_stream = cudadriver.Stream()
def load_tensorrt_engine(self, path):
trt_logger = trt.Logger()
with open(path, "rb") as f, trt.Runtime(trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def pre_process(self, image):
if isinstance(image, torch.Tensor):
image = image.cpu().numpy()
image = image.astype(np.float32)
return image
def post_process(self, output_tensors):
rstacked = output_tensors.view(self.output_shape)
if self.multiple_returns:
return [rstacked[s] for s in self.return_slices]
else:
return rstacked
@abstractmethod
def allocate_memory_buffers(self):
pass
class UnifiedTensorRTEngine(TensorRTEngine):
def __init__(
self,
tensorrt_file: str,
output_shape: tuple,
multiple_returns: bool = False,
return_slices: list = [],
):
super().__init__(tensorrt_file, output_shape, multiple_returns, return_slices)
self.in_memory, self.out_memory, self.bindings = self.allocate_memory_buffers()
def __call__(self, image):
image = self.pre_process(image)
self.in_memory[:, :, :] = image
self.execution_context.execute_async_v2(
bindings=self.bindings, stream_handle=self.execution_stream.handle
)
self.execution_stream.synchronize()
stacked = torch.stack([torch.Tensor(out) for out in self.out_memory])
post_processed_output = self.post_process(stacked)
return post_processed_output
def allocate_memory_buffers(self):
dtypes, bindings = [], []
for binding in self.engine:
dtypes.append(trt.nptype(self.engine.get_binding_dtype(binding)))
in_shape = tuple(self.engine.get_binding_shape(0))
out_shape = tuple(self.engine.get_binding_shape(1))
input_data_size = trt.volume(in_shape) * self.engine.max_batch_size
output_data_size = trt.volume(out_shape) * self.engine.max_batch_size
in_memory = cudadriver.managed_empty(
shape=in_shape,
dtype=dtypes[0],
mem_flags=cudadriver.mem_attach_flags.GLOBAL,
)
out_memory = cudadriver.managed_empty(
shape=out_shape,
dtype=dtypes[1],
mem_flags=cudadriver.mem_attach_flags.GLOBAL,
)
self.execution_stream.synchronize()
bindings.append(int(in_memory.base.get_device_pointer()))
bindings.append(int(out_memory.base.get_device_pointer()))
return in_memory, out_memory, bindings
if __name__ == "__main__":
engine = UnifiedTensorRTEngine(tensorrt_file="dummy.trt", output_shape=(1, -1))
full_image_tensor = torch.randint(0, 255, size=(3, 32, 32))
use_gpu = True
if use_gpu:
image_tensor = image_tensor.cuda() # FAILS
else:
pass # PASSES
out = engine(image_tensor)
print(out)
Here’s the error that occurs:
[TensorRT] ERROR: ../rtExt/cuda/cudaFusedConvActRunner.cpp (313) - Cuda Error in executeFused: 400 (invalid resource handle)
[TensorRT] ERROR: FAILED_EXECUTION: std::exception
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
After inference occurs there are some further post-process steps that use pytorch so being able to use tensorrt and pytorch in the same process is important.