Pytorch Convolution 2d result sanity check

I am trying to do a sanity check of my implementation of 2D convolution in PyTorch.

I am doing the following :

B, C, H, W = 64, 3, 32, 32
x = torch.arange(B*C*H*W).view(B, C, H, W)
# construct an image of shape (64,3,32,32)
x = torch.tensor(x,dtype=torch.float32)

C_out, K = 8, 5
stride = 1

# Get conv2d layer from pytorch
conv = nn.Conv2d(C, C_out, K, bias=False)
conv.eval() 

# torch library's output 
torch_conv_out = conv(x)

# extract this weight to check
torch_w = conv.weight.data

# My loop implementation -- Removed boilerplate for sharing
out_h, out_w = (H-K)//stride +1, (W-K)//stride +1 # 28,28
out_image = torch.empty(B, C_out, out_h, out_w)
for f in range(C_out):
  for i in range(out_h):
    for j in range(out_w):
      patch = x[:,:,i:i+K,j:j+K]
      out_image[:,f,i,j] = torch.sum(torch.mul(patch,torch_w[f,:,:,:]), dim=(-1,-2,-3))


## My vectorized implementation  -- Removed boilerplate for sharing
patches = x.unfold(2, K, stride).unfold(3, K, stride)
patches = patches.transpose(1,3)
# get weights to shape [8, 1, 1, 1, 3, 5, 5] by adding additional dimensions 
torch_w_reshaped = torch_w.unsqueeze(1).unsqueeze(1).unsqueeze(1) 
# element wise multiplication 
# [64, 28, 28, 3, 5, 5] * [8, 1, 1, 1, 3, 5, 5] 
#--> [8, 64, 28, 28, **3, 5, 5**] 
# --> sum --> [8, 64, 28, 28]
# --> transpose --> [64, 8, 28, 28]
vect_output = torch.mul(patches, torch_w_reshaped).sum(dim=(-1,-2,-3)).transpose(0,1)

When I check, these 3 results are different from each other.

torch.sum((out_image - torch_conv_out)**2)**0.5 #5.5601
torch.sum((vect_output - out_image)**2)**0.5 #83723.3828
torch.sum((vect_output - torch_conv_out)**2)**0.5 #83723.3906

Why are these results different from each other? Could you kindly help me understand what I am doing wrong?

The first comparison looks alright as the relative error is in the expected range (~1e-7) for float32. You can use integer weights to verify it:

with torch.no_grad():
    conv.weight.copy_(torch.randint(-1, 1, (conv.weight.nelement(),)).view_as(conv.weight))

The unfold approach is wrong and this post gives you an example using a manual unfold approach.
Fixing it yields no mismatches:

patches = x.unfold(2, K, stride).unfold(3, K, stride)
patches = patches.contiguous().view(B, C, -1, K, K)
patches = patches.permute(0, 2, 1, 3, 4)

torch_w_reshaped = torch_w.unsqueeze(1).unsqueeze(1).unsqueeze(1) 
vect_output = torch.mul(patches.unsqueeze(2), torch_w.unsqueeze(0).unsqueeze(1)).sum(dim=(-1,-2,-3))
vect_output = vect_output.permute(0, 2, 1)
vect_output = vect_output.view(B, -1, out_image.size(2), out_image.size(3))

torch.sum((out_image - torch_conv_out)**2)**0.5 # tensor(0., grad_fn=<PowBackward0>)
torch.sum((vect_output - out_image)**2)**0.5 # tensor(0.)
torch.sum((vect_output - torch_conv_out)**2)**0.5 # tensor(0., grad_fn=<PowBackward0>)