How to make quantized and float tensors on same device?

I am using Kaggle GPU to train resnet18. After training the model which I imported rom torchvision.models.quantization import resnet18 I perform the quantization upon it as shown below.

model = Q_resnet18()
model.load_state_dict(torch.load('./my_model2.pth'))
print_model_size(model)
backend = "qnnpack"

model.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
model_static_quantized = torch.quantization.prepare(model, inplace=False)
model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False)
print_model_size(model_static_quantized)

And now I want to evaluate my quantized model with other models. For which I use following function.

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
        

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, weight_decay=0, grad_clip=None, opt_func = torch.optim.Adam):
    torch.cuda.empty_cache()
    history = []
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # set up one cycle lr scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        
        # Training phase
        model.train()       
        train_losses = []
        lrs = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            
            # calculates gradients
            loss.backward()
            
            # check gradient clipping 
            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
                
            # perform gradient descent and modifies the weights
            optimizer.step()
            
            # reset the gradients
            optimizer.zero_grad()
            
            # record and update lr
            lrs.append(get_lr(optimizer))
            
            # modifies the lr value
            sched.step()
            
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
        
        
    return history
        
    
    

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

When I use evaluate(model_name, val_dl) I encounter an error saying

/opt/conda/lib/python3.7/site-packages/torch/nn/quantized/modules/__init__.py in forward(self, X)
     47     def forward(self, X):
     48         return torch.quantize_per_tensor(X, float(self.scale),
---> 49                                          int(self.zero_point), self.dtype)
     50 
     51     @staticmethod

RuntimeError: quantize_tensor_per_tensor_affine expects a quantized and float tensors to be on the same device.

I tried evaluate(model_name.to(device), val_dl) but it didn’t work. (I checked and the device is ‘cuda’)

What do I have to do to solve this error??

@HarshRangwala any news?

UPDATE : In the documentation it’s wrote At the moment PyTorch doesn’t provide quantized operator implementations on CUDA - this is the direction for future work. Move the model to CPU in order to test the quantized functionality.

It’s may be the reason why.

If you want to try things out early, we also have a quantized CUDA test here: https://github.com/pytorch/pytorch/blob/master/torch/fx/experimental/fx2trt/example/quantized_resnet_test.py