What does it mean when the models accuracy is high after quantization?

I trained resnet18 on my dataset and here are my models stats:

Before quantization

Evaluation complete in 2m 19s
Loss: 0.6099 Acc: 0.8468

After quantization

Evaluation complete in 1m 32s
Loss: 0.6310 Acc: 0.8548

Here’s some code:

import time
def print_size_of_model(model):
    """ Print the size of the model.
    
    Args:
        model: model whose size needs to be determined

    """
    torch.save(model.state_dict(), "temp.p")
    print('Size of the model(MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')
    
def evaluate(model,device, dataloader, criterion, quantize = False, fbgemm = False):
    """This function prints model accuracy and loss on the given dataset"""
    model.to(device)
    model.eval()
    
    if quantize:
        modules_to_fuse = [['conv1', 'bn1'],
                   ['layer1.0.conv1', 'layer1.0.bn1'],
                   ['layer1.0.conv2', 'layer1.0.bn2'],
                   ['layer1.1.conv1', 'layer1.1.bn1'],
                   ['layer1.1.conv2', 'layer1.1.bn2'],
                   ['layer2.0.conv1', 'layer2.0.bn1'],
                   ['layer2.0.conv2', 'layer2.0.bn2'],
                   ['layer2.0.downsample.0', 'layer2.0.downsample.1'],
                   ['layer2.1.conv1', 'layer2.1.bn1'],
                   ['layer2.1.conv2', 'layer2.1.bn2'],
                   ['layer3.0.conv1', 'layer3.0.bn1'],
                   ['layer3.0.conv2', 'layer3.0.bn2'],
                   ['layer3.0.downsample.0', 'layer3.0.downsample.1'],
                   ['layer3.1.conv1', 'layer3.1.bn1'],
                   ['layer3.1.conv2', 'layer3.1.bn2'],
                   ['layer4.0.conv1', 'layer4.0.bn1'],
                   ['layer4.0.conv2', 'layer4.0.bn2'],
                   ['layer4.0.downsample.0', 'layer4.0.downsample.1'],
                   ['layer4.1.conv1', 'layer4.1.bn1'],
                   ['layer4.1.conv2', 'layer4.1.bn2']]
        model = torch.quantization.fuse_modules(model, modules_to_fuse)
        if fbgemm:
            model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
        else:
            model.qconfig = torch.quantization.default_qconfig
        torch.quantization.prepare(model, inplace=True)
        model.eval()
        with torch.no_grad():
            for data, target in train_dl:
                model(data)
        torch.quantization.convert(model, inplace=True)
    
    print(model)
    
    running_loss = 0.0
    running_corrects = 0
    with torch.no_grad():
        t0 = time.time()
        for i, (inputs, labels) in enumerate(dataloader):
            inputs , labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    t1 = time.time()
    time_elapsed = t1 - t0
    print('Evaluation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = running_corrects.double() / len(dataloader.dataset)
    print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))


def evaluate_speed(model, dataloader):
    """This function evaluates only the speed of the given model"""
    with torch.no_grad():
        t0 = time.time()
        for inputs, _ in dataloader:
            model(inputs)
    t1 = time.time()
    time_elapsed = t1 - t0
    print('Evaluation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Inference speed was {:.5f}s per sample at batch size {:d}'.format(
        time_elapsed / float(len(dataloader.dataset)), dataloader.batch_size))
device = 'cpu'
encoder = ResNet(num_classes=258) # Add quantize = True to quantize the model. 
loaded_dict_enc = torch.load('QModels_R3/model_checkpoint_epoch_19.pt', map_location=device)
encoder.load_state_dict(loaded_dict_enc)
evaluate(model=encoder, device=device,dataloader = test_dl, criterion =  nn.CrossEntropyLoss())

I want to know whats the reason behind the model’s increased accuracy?
Is it normal?
After looking through some other posts I noticed that my random_seed was set to 45 and many suggested to make it 0 or even 1. So could that be a possible cause of high accuracy? or did I do anything wrong??

How large is the dataset? It could just be that small accuracy changes are noise if the evaluation dataset is small.
The use of a different seed shouldn’t be important here.

Heres the distribution of my dataset:

train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])
len(train_ds), len(val_ds), len(test_ds)
(10213, 1134, 1260)

and there are 258 classes.

Dont know if this is relevant but I am using test dataloader for my final evaluations.

I don’t think the difference is really concerning here; is the performance also similar on the validation set before and after quantization?