Inference result mismatch between eager mode and torch.compile(mode=“max-autotune”)
Hi team,
I’m encountering a result mismatch when running inference using torch.compile
.
What I’m doing:
- I have a PyTorch model defined with standard
nn.Module
.
# torch.rand(1, 3, 224, 224, dtype=input_dtype)
import torch.nn as nn
class BaseConv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, conv_layer):
super().__init__()
self.conv = conv_layer(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
def forward(self, x):
return self.conv(x)
class ActivatedConv(BaseConv):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, conv_layer, activation):
super().__init__(in_channels, out_channels, kernel_size, stride, padding, conv_layer)
self.activation = activation
def forward(self, x):
return self.activation(self.conv(x))
class NormalizedConv(ActivatedConv):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, conv_layer, norm, activation):
super().__init__(in_channels, out_channels, kernel_size, stride, padding, conv_layer, activation)
self.norm = norm(out_channels)
def forward(self, x):
return self.activation(self.norm(self.conv(x)))
class Conv2DBNReLU(NormalizedConv):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super().__init__(in_channels, out_channels, kernel_size, stride, padding, nn.Conv2d, nn.BatchNorm2d, nn.ReLU())
class MyModel(nn.Module):
def __init__(self, in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1):
super().__init__()
self.conv1 = Conv2DBNReLU(in_channels, out_channels, kernel_size, stride, padding)
def forward(self, x):
return self.conv1(x)
def my_model_function(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1):
return MyModel(in_channels, out_channels, kernel_size, stride, padding)
if __name__ == "__main__":
model = my_model_function()
print(model)
- I run inference in two different modes:
- Eager mode:
output_eager = model(input_tensor)
- Compiled mode:
output_compiled = torch.compile(model, mode="max-autotune")(input_tensor)
- Eager mode:
Problem:
The two outputs are not equal, and the difference exceeds typical tolerances like torch.allclose(output_eager, output_compiled, atol=1e-5)
.
This happens consistently on my model when compiled using torch.compile
with mode="max-autotune"
(and default backend = “inductor”).
=== Detailed comparison ===
Total number of elements: 3,211,264
Max absolute error: 0.00128412
Mean absolute error: 0.000100889
Max relative error: 23,868.7
Mean relative error: 0.285904
Number of elements exceeding tolerance: 98,102
Percentage of out-of-tolerance elements: 3.05%
Result of torch.allclose(output_eager, output_compiled, atol=1e-5): False
My Question:
- Is this difference expected due to aggressive optimizations under
max-autotune
? - Or is this possibly a bug or unsupported pattern in my model?
I’d appreciate any clarification, or suggestions on how to debug this further.
Thanks!