Getting Runtime Error when testing quantized model

i am getting the runtime error when i try to test evaluate my quantized model, how do i go about fixing this? i have no clue where to even begin

     81 for X, y in test_dataloader:
     82     X, y = X.to(self.device), y.to(self.device)
---> 83     pred = self.model(X)
     84     test_loss += self.loss_fn(pred, y).item()
     85     correct += (pred.argmax(1) == y).type(torch.float).sum().item()

RuntimeError: getCudnnDataTypeFromScalarType() not supported for QUInt8
    def calibrate(model, data_loader):
        model.eval()
        with torch.no_grad():
            for image, target in data_loader:
                image = image.to(self.device)
                model(image)

    model_fp32 = copy.deepcopy(self.model)
    qconfig_mapping = get_default_qconfig_mapping("qnnpack")
    model_fp32.eval()
    example_inputs = (next(iter(data_loader))[0])
    prepared_model = prepare_fx(model_fp32, qconfig_mapping, example_inputs)
    calibrate(prepared_model, data_loader_test)
    self.model = convert_fx(prepared_model)
    def test(self, test_dataloader:DataLoader)->list:
        size = len(test_dataloader.dataset)
        num_batches = len(test_dataloader)
        self.model.eval()
        test_loss, correct = 0, 0
        with torch.no_grad():
            for X, y in test_dataloader:
                X, y = X.to(self.device), y.to(self.device)
                pred = self.model(X)
                test_loss += self.loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
        return [correct*100, test_loss]
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

int8 quantization is not supported natively on GPUs in PyTorch yet, you can move the model to CPU to get around this.