I am trying to do a sanity check of my implementation of 2D convolution in PyTorch.
I am doing the following :
B, C, H, W = 64, 3, 32, 32
x = torch.arange(B*C*H*W).view(B, C, H, W)
# construct an image of shape (64,3,32,32)
x = torch.tensor(x,dtype=torch.float32)
C_out, K = 8, 5
stride = 1
# Get conv2d layer from pytorch
conv = nn.Conv2d(C, C_out, K, bias=False)
conv.eval()
# torch library's output
torch_conv_out = conv(x)
# extract this weight to check
torch_w = conv.weight.data
# My loop implementation -- Removed boilerplate for sharing
out_h, out_w = (H-K)//stride +1, (W-K)//stride +1 # 28,28
out_image = torch.empty(B, C_out, out_h, out_w)
for f in range(C_out):
for i in range(out_h):
for j in range(out_w):
patch = x[:,:,i:i+K,j:j+K]
out_image[:,f,i,j] = torch.sum(torch.mul(patch,torch_w[f,:,:,:]), dim=(-1,-2,-3))
## My vectorized implementation -- Removed boilerplate for sharing
patches = x.unfold(2, K, stride).unfold(3, K, stride)
patches = patches.transpose(1,3)
# get weights to shape [8, 1, 1, 1, 3, 5, 5] by adding additional dimensions
torch_w_reshaped = torch_w.unsqueeze(1).unsqueeze(1).unsqueeze(1)
# element wise multiplication
# [64, 28, 28, 3, 5, 5] * [8, 1, 1, 1, 3, 5, 5]
#--> [8, 64, 28, 28, **3, 5, 5**]
# --> sum --> [8, 64, 28, 28]
# --> transpose --> [64, 8, 28, 28]
vect_output = torch.mul(patches, torch_w_reshaped).sum(dim=(-1,-2,-3)).transpose(0,1)
When I check, these 3 results are different from each other.
torch.sum((out_image - torch_conv_out)**2)**0.5 #5.5601
torch.sum((vect_output - out_image)**2)**0.5 #83723.3828
torch.sum((vect_output - torch_conv_out)**2)**0.5 #83723.3906
Why are these results different from each other? Could you kindly help me understand what I am doing wrong?