Hi All,
I encountered a large numerical discrepancy between functional.conv2d() and functional.unfold() + torch.tensordot(), which causes me trouble. My PyTorch version is 1.5.0. Now I write up a minimalistic example to reproduce:
import math
import numpy as np
import torch
import torch.optim as optim
import torch.backends.cudnn
import torch.nn as nn
import torch.nn.functional as F
import argparse
def main():
## parse arguments
parser = argparse.ArgumentParser(description='conv2d & tensordot discrepancy test.')
parser.add_argument('--device', default="cuda", type=str, choices=['cuda', 'cpu'], help='cuda or cpu')
args = parser.parse_args()
## improve reproducibility
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False # setting this to True doubles the error
torch.backends.cudnn.benchmark = False
x = torch.randn(128,64,32,32).to(args.device) # input: B,C,W,H
w = torch.randn(64,64,3,3).to(args.device) # kernel: Cout,Cin,K,K
a = F.unfold(x, kernel_size=3, dilation=1, padding=1, stride=1).reshape(-1,64*3**2,32,32)
b = torch.tensordot(w.reshape(64,-1),a.transpose(0,1),dims=1).transpose(0,1)
c = F.conv2d(x, weight=w, bias=None, stride=1, padding=1, dilation=1, groups=1)
diff = c - b
ratio = diff / torch.max(b.abs(),c.abs())
print("xmax={},xmin={}".format(x.abs().max(),x.abs().min()))
print("wmax={},wmin={}".format(w.abs().max(),w.abs().min()))
print("bmax={},bmin={}".format(b.abs().max(),b.abs().min()))
print("cmax={},cmin={}".format(c.abs().max(),c.abs().min()))
print("diffmax={},diffmin={}".format(diff.abs().max(),diff.abs().min()))
print("ratiomax={},ratiomin={}".format(ratio.abs().max(),ratio.abs().min()))
if __name__ == '__main__':
main()
The output for device=“cuda”:
xmax=5.237013339996338,xmin=2.282601769820758e-07
wmax=4.395885944366455,wmin=4.590053504216485e-05
bmax=138.94764709472656,bmin=4.302736215322511e-06
cmax=138.9477081298828,cmin=1.5795230865478516e-06
diffmax=0.0010390281677246094,diffmin=0.0
ratiomax=1.754783272743225,ratiomin=0.0
The output for device=“cpu”:
xmax=5.237013339996338,xmin=2.282601769820758e-07
wmax=4.395885944366455,wmin=4.590053504216485e-05
bmax=138.94773864746094,bmin=1.9371509552001953e-06
cmax=138.94757080078125,cmin=3.674944991871598e-06
diffmax=0.0001678466796875,diffmin=0.0
ratiomax=0.6250920295715332,ratiomin=0.0
The magnitude of the difference seems large to me, what is causing this?
In my application the unfold()+tensordot() approach is giving much better results, yet it is much slower than conv2d() .
Thanks !