im currently following the guide on quantization from here
https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_static.html
i tried using “model.to(self.device)” in the calibrate function but it doesnt seem to be working, does anyone know what i did wrong?
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(self.device)
self.model = Net().to(self.device)
model_to_quantize = copy.deepcopy(self.model)
model_to_quantize.eval()
qconfig = get_default_qconfig("x86")
qconfig_mapping = QConfigMapping().set_global(qconfig)
def calibrate(model, data_loader):
model.to(self.device)
model.eval()
with torch.no_grad():
for image, target in data_loader:
model(image)
example_inputs = (next(iter(data_loader))[0])
prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs)
calibrate(prepared_model, data_loader_test)
self.model = convert_fx(prepared_model)
but when i try to run it i get
43 with torch.no_grad():
44 for image, target in data_loader:
---> 45 model(image)
...
1163 )
1164 else:
1165 new_min, new_max = torch.aminmax(x)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument self in method wrapper_CUDA_out_histc_out)