How to make quantized and float tensors on same device?

HarshRangwala · June 23, 2021, 5:44pm

I am using Kaggle GPU to train resnet18. After training the model which I imported rom torchvision.models.quantization import resnet18 I perform the quantization upon it as shown below.

model = Q_resnet18()
model.load_state_dict(torch.load('./my_model2.pth'))
print_model_size(model)
backend = "qnnpack"

model.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
model_static_quantized = torch.quantization.prepare(model, inplace=False)
model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False)
print_model_size(model_static_quantized)

And now I want to evaluate my quantized model with other models. For which I use following function.

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
        

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, weight_decay=0, grad_clip=None, opt_func = torch.optim.Adam):
    torch.cuda.empty_cache()
    history = []
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # set up one cycle lr scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        
        # Training phase
        model.train()       
        train_losses = []
        lrs = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            
            # calculates gradients
            loss.backward()
            
            # check gradient clipping 
            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
                
            # perform gradient descent and modifies the weights
            optimizer.step()
            
            # reset the gradients
            optimizer.zero_grad()
            
            # record and update lr
            lrs.append(get_lr(optimizer))
            
            # modifies the lr value
            sched.step()
            
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
        
        
    return history
        
    
    

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

When I use evaluate(model_name, val_dl) I encounter an error saying

/opt/conda/lib/python3.7/site-packages/torch/nn/quantized/modules/__init__.py in forward(self, X)
     47     def forward(self, X):
     48         return torch.quantize_per_tensor(X, float(self.scale),
---> 49                                          int(self.zero_point), self.dtype)
     50 
     51     @staticmethod

RuntimeError: quantize_tensor_per_tensor_affine expects a quantized and float tensors to be on the same device.

I tried evaluate(model_name.to(device), val_dl) but it didn’t work. (I checked and the device is ‘cuda’)

What do I have to do to solve this error??

Thytu · November 4, 2021, 4:58pm

@HarshRangwala any news?

Thytu · November 5, 2021, 9:16am

UPDATE : In the documentation it’s wrote At the moment PyTorch doesn’t provide quantized operator implementations on CUDA - this is the direction for future work. Move the model to CPU in order to test the quantized functionality.

It’s may be the reason why.

jerryzh168 · November 5, 2021, 10:42pm

If you want to try things out early, we also have a quantized CUDA test here: https://github.com/pytorch/pytorch/blob/master/torch/fx/experimental/fx2trt/example/quantized_resnet_test.py