Modify the amount of calibration data, the model accuracy after int8 quantization is actually higher than the original model accuracy
def get_imagenet(dataset_dir=’…/dataset/CIFAR10’, batch_size=32):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_transform = transforms.Compose([
# transforms.Resize(256),
transforms.RandomResizedCrop(32),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
test_transform = transforms.Compose([
transforms.ToTensor(),
normalize,
])
train_dataset = datasets.CIFAR10(root=dataset_dir, train=True, transform=train_transform, download=True)
test_dataset = datasets.CIFAR10(root=dataset_dir, train=False, transform=test_transform, download=True)
trainloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=NUM_WORKERS,
pin_memory=True, shuffle=False)
testloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=NUM_WORKERS,
pin_memory=True, shuffle=False)
return trainloader, testloader
class quantizeModel(object):
def __init__(self):
super(quantizeModel, self).__init__()
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
self.train_loader, self.test_loader = get_imagenet()
self.quant()
def quant(self):
model = self.load_model()
model.eval()
self.print_size_of_model(model)
self.validate(model, "original_resnet18", self.test_loader)
model.fuse_model()
self.print_size_of_model(model)
self.quantize(model)
def load_model(self):
model = resnet18()
state_dict = torch.load("CIFAR10_resnet18.pth", map_location=self.device)
model.load_state_dict(state_dict)
model.to(self.device)
return model
def print_size_of_model(self, model):
torch.save(model.state_dict(), "temp.p")
print('Size (MB):', os.path.getsize("temp.p") / 1e6)
os.remove('temp.p')
def validate(self, model, name, data_loader):
with torch.no_grad():
correct = 0
total = 0
acc = 0
for data in data_loader:
images, labels = data
images, labels = images.to(self.device), labels.to(self.device)
output = model(images)
_, predicted = torch.max(output, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
if total == 1024: #calibration data
break
acc = round(100 * correct / total, 3)
print('{{"metric": "{}_val_accuracy", "value": {}%}}'.format(name, acc))
return acc
def quantize(self, model):
#model.qconfig = torch.quantization.default_qconfig
#model.qconfig = torch.quantization.default_per_channel_qconfig
model.qconfig = torch.quantization.QConfig(
activation=torch.quantization.observer.MinMaxObserver.with_args(reduce_range=True),
weight=torch.quantization.observer.PerChannelMinMaxObserver.with_args(dtype=torch.qint8,
qscheme=torch.per_channel_affine))
pmodel = torch.quantization.prepare(model)
#calibration
self.validate(pmodel, "quntize_per_channel_resent18_train", self.train_loader)
qmodel = torch.quantization.convert(pmodel)
self.validate(qmodel, "quntize_per_chaannel_resent18_test", self.test_loader)
self.print_size_of_model(qmodel)
torch.jit.save(torch.jit.script(qmodel), "quantization_per_channel_model18.pth")
Original model accuracy:71.76%
First quantification:batch_size:32 calibration data: 2048
Quantified model accuracy:71.51%
Second quantification:batch_size:32 calibration data: 1024
Quantified model accuracy:71.85%
Why the accuracy becomes higher after quantization?
In addition, I think that the total number of calibration data remains unchanged, the maximum and minimum range of activation should be fixed, and the quantization accuracy should also be fixed. However, it is found that the total number of calibration data remains unchanged, and if batch_size is modified, the accuracy after quantization will change. What is the reason?