F.l1_loss does not match with NumPy

I tried to compare PyTorch F.l1_loss with NumPy computation and found that when the input and targets have dtype=float64, the results match but when they are flat32, they do not match.

import unittest
import numpy as np
import torchvision
import torch.nn.functional as F
from PIL import Image

np.random.seed(1)


class TestL1loss(unittest.TestCase):

    @staticmethod
    def load_images():
        img = Image.open('1.jpg')

        img_width, img_height = img.size
        img_a = img.crop([0, 0, img_width//2, img_height])
        img_b = img.crop([img_width//2, 0, img_width, img_height])
        img_a.save('img_a.png')
        img_b.save('img_b.png')

        tsfm = torchvision.transforms.ToTensor()
        return tsfm(img_a), tsfm(img_b)

    def test_l1_loss_float32(self):
        t_a, t_b = TestL1loss.load_images()

        # both float32
        self.assertAlmostEqual(
            F.l1_loss(t_a, t_b, reduction='mean').item(),
            np.mean(np.abs(t_a.numpy() - t_b.numpy())))

        self.assertAlmostEqual(
            F.l1_loss(t_a, t_b, reduction='mean').item(),
            np.mean(np.abs(t_a.numpy() - t_b.numpy())),
            places=3)

    def test_l1_loss_float64(self):
        t_a, t_b = TestL1loss.load_images()

        # both double/float64
        t_a_arr = t_a.numpy().astype(np.float64)
        t_b_arr = t_b.numpy().astype(np.float64)
        self.assertAlmostEqual(
            F.l1_loss(t_a.double(), t_b.double(), reduction='mean').item(),
            np.mean(np.abs(t_a_arr - t_b_arr)))

    def test_l1_loss_mixed(self):
        t_a, t_b = TestL1loss.load_images()

        # double tensors vs. float32 arrays
        self.assertAlmostEqual(
            F.l1_loss(t_a.double(), t_b.double(), reduction='mean').item(),
            np.mean(np.abs(t_a.numpy() - t_b.numpy())))

I got the example image from the facades dataset (http://cmp.felk.cvut.cz/~tylecr1/facade/).

and the following shows that two of these tests fails:

F.F
======================================================================
FAIL: test_l1_loss_float32 (test_l1loss.TestL1loss)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/vahid/github/test_l1loss.py", line 31, in test_l1_loss_float32
    np.mean(np.abs(t_a.numpy() - t_b.numpy())))
AssertionError: 0.41515448689460754 != 0.41536176 within 7 places (0.0002072751522064209 difference)

======================================================================
FAIL: test_l1_loss_mixed (test_l1loss.TestL1loss)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/vahid/github/test_l1loss.py", line 54, in test_l1_loss_mixed
    np.mean(np.abs(t_a.numpy() - t_b.numpy())))
AssertionError: 0.41536183419634415 != 0.41536176 within 7 places (7.214953018364056e-08 difference)

----------------------------------------------------------------------
Ran 3 tests in 0.151s

FAILED (failures=2)

In case of float32, they only match upto 3 decimal places. The test case with mixed dtype shows that when the tensors are in double format, but NumPy arrays are in float32 type, the result is very close (error ~7e-8). So that tells me that NumPy’s float32 dtype is as accurate as PyTorch double format.

1 Like

Hi,

Can you give more information about the images you use to check this?

I tried with random Tensors but I can’t see the same thing:

a = torch.rand(250, 250)
b = torch.rand(250, 250)

F.l1_loss(a, b, reduction='mean').item() - np.mean(np.abs(a.numpy() - b.numpy()))
> 2.9802322387695312e-08

What can happen is that the reduction is imprecise if the values in your image are large.
Can you try comparing to (a - b).abs().mean() for torch as well?

Thanks @albanD for the suggestion.

I confirmed that the error with (a - b).abs().mean() is smaller than F.l1_loss.

Error with (a - b).abs().mean() => 2.086162567138672e-07
Error with F.l1_loss => 0.0002072751522064209

I would guess that this is because l1_loss is still implemented with the old THNN library which does the mean() reduction differently and is usually less precise.

1 Like