Hello,
I’m performing a batch of matrix multiplication using torch.matmul
function or torch.mm
function.
but, I found that the output of matmul
is not equal to batch of mm
, especially when the dimensions of the matrix are large.
For reference, here is what I used:
import numpy as np
import torch
def diff(x, y):
x_expand = x.unsqueeze(0).expand(2, *x.size())
y_expand = y.unsqueeze(0).expand(2, *y.size())
# @ means `matmul`
torch_diff = x @ y - (x_expand @ y_expand)[0]
numpy_diff = x.numpy() @ y.numpy() - (x_expand.numpy() @ y_expand.numpy())[0]
torch_numpy_diff = x @ y - x.numpy() @ y.numpy()
return torch_diff, numpy_diff, torch_numpy_diff
def check(x):
print("x.mean():", x.mean(), "x.max():", x.max())
torch_diff, numpy_diff, torch_numpy_diff = diff(x, x.t())
torch_diff = torch_diff.abs()
numpy_diff = np.abs(numpy_diff)
torch_numpy_diff = torch_numpy_diff.abs()
print("torch_diff.mean():", torch_diff.mean(), "torch_diff.max():", torch_diff.max())
print("numpy_diff.mean()", numpy_diff.mean(), "numpy_diff.max():", numpy_diff.max())
print("torch_numpy_diff.mean()", torch_numpy_diff.mean(), "torch_numpy_diff.max():", torch_numpy_diff.max())
print()
if __name__ == '__main__':
torch.manual_seed(0)
check(torch.randn([81, 100]))
check(torch.randn([81, 10000]))
check(torch.randn([81, 1000000]))
check(torch.randn([81, 10000]) * 100)
check(torch.randn([81, 10000]) * 10000)
env_info = f"- PyTorch version: {torch.__version__}\n" \
f"- CUDA version: {torch.version.cuda}\n" \
f"- cuDNN version: {torch.backends.cudnn.version()}\n"
print(env_info)
and I got the output like this:
x.mean(): tensor(-0.0085) x.max(): tensor(4.1015)
torch_diff.mean(): tensor(4.8151e-08) torch_diff.max(): tensor(7.6294e-06)
numpy_diff.mean() 0.0 numpy_diff.max(): 0.0
torch_numpy_diff.mean() tensor(8.4816e-08) torch_numpy_diff.max(): tensor(9.5367e-06)
x.mean(): tensor(0.0004) x.max(): tensor(4.6582)
torch_diff.mean(): tensor(4.0393e-05) torch_diff.max(): tensor(0.0029)
numpy_diff.mean() 0.0 numpy_diff.max(): 0.0
torch_numpy_diff.mean() tensor(2.7329e-05) torch_numpy_diff.max(): tensor(0.0020)
x.mean(): tensor(2.8049e-05) x.max(): tensor(5.5180)
torch_diff.mean(): tensor(0.0091) torch_diff.max(): tensor(2.5625)
numpy_diff.mean() 0.0 numpy_diff.max(): 0.0
torch_numpy_diff.mean() tensor(0.0014) torch_numpy_diff.max(): tensor(0.4375)
x.mean(): tensor(-0.0214) x.max(): tensor(503.4597)
torch_diff.mean(): tensor(0.4008) torch_diff.max(): tensor(40.)
numpy_diff.mean() 0.0 numpy_diff.max(): 0.0
torch_numpy_diff.mean() tensor(0.2926) torch_numpy_diff.max(): tensor(24.)
x.mean(): tensor(2.8537) x.max(): tensor(44475.2266)
torch_diff.mean(): tensor(3845.2627) torch_diff.max(): tensor(327680.)
numpy_diff.mean() 0.0 numpy_diff.max(): 0.0
torch_numpy_diff.mean() tensor(2937.2839) torch_numpy_diff.max(): tensor(262144.)
- PyTorch version: 1.7.0
- CUDA version: 10.1
- cuDNN version: 7603