About Normalization using pre-trained vgg16 networks

Good answer. Thank you

1 Like

Sorry for reviving this thread, but your current approach gives a high error in the estimation of std, if I add an offset to the random data:



class MyDataset(Dataset):
    def __init__(self):
        self.data = torch.randn(1000, 3, 24, 24) + 10000
        
    def __getitem__(self, index):
        x = self.data[index]
        return x

    def __len__(self):
        return len(self.data)

def online_mean_and_sd(loader):
    """Compute the mean and sd in an online fashion

        Var[x] = E[X^2] - E^2[X]
    """
    cnt = 0
    fst_moment = torch.empty(3)
    snd_moment = torch.empty(3)

    for data in loader:

        b, c, h, w = data.shape
        nb_pixels = b * h * w
        sum_ = torch.sum(data, dim=[0, 2, 3])
        sum_of_square = torch.sum(data ** 2, dim=[0, 2, 3])
        fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)

        cnt += nb_pixels

    return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)

    

dataset = MyDataset()
loader = DataLoader(
    dataset,
    batch_size=1,
    num_workers=1,
    shuffle=False
)

mean, std = online_mean_and_sd(loader)
print(mean, dataset.data.mean([0, 2, 3]))
> tensor([10000.0039,  9999.9990, 10000.0020]) tensor([10000.0010, 10000.0000, 10000.0000])
print(std, dataset.data.std([0, 2, 3]))
> tensor([15.4919, 18.7617, 16.7332]) tensor([0.9995, 0.9994, 1.0010])
1 Like

Correct. I’ve tested on CIFAR10 and the mean of pixel stds is [51.6, 50.8, 51.2] while the pixel std is [63.0, 62.1, 66.7], which is quite different.

The code below lacks optimization but gives the correct std:

    pixel_mean = np.zeros(3)
    pixel_std = np.zeros(3)
    k = 1
    for image, _ in tqdm(dataset, "Computing mean/std", len(dataset), unit="samples"):
        image = np.array(image)
        pixels = image.reshape((-1, image.shape[2]))

        for pixel in pixels:
            diff = pixel - pixel_mean
            pixel_mean += diff / k
            pixel_std += diff * (pixel - pixel_mean)
            k += 1

    pixel_std = np.sqrt(pixel_std / (k - 2))
    print(pixel_mean)
    print(pixel_std)

Adapted from this SO answer.

4 Likes

This works for me.
I adjust your answer. Thanks a lot

pixel_mean = np.zeros(1)
pixel_std = np.zeros(1)
k = 1
for i_batch, sample_batched in enumerate(dataloader):
    image = np.array(sample_batched['image'])
    pixels = image.reshape((-1, image.shape[1]))
    for pixel in pixels:
        diff = pixel - pixel_mean
        pixel_mean = pixel_mean + diff / k
        pixel_std = pixel_std + diff * (pixel - pixel_mean)
        k += 1
pixel_std = np.sqrt(pixel_std / (k - 2))
print(pixel_mean)# [5.50180734]
print(pixel_std)#[8.27773514]

Computation of std is wrong! std dev of dataset is NOT equal to avg of std dev of batches. Authors needs to edit their answers with “Do not use this!” warning.

3 Likes

Hi @ptrblck,
std can’t be added and then averaged to get the overall std, please have a look at this,https://stackoverflow.com/a/60803379/8063334

1 Like

Yes, @sytelus mentioned this already, so please post the correct version here. :slight_smile:

If you are looking for mean and std statistics for each channel of the dataset, instead of just the mean and std, I created the solution below. The code is mainly an extension of ptrblck’s code above

class MyDataset(Dataset):
    def __init__(self):
        self.data = torch.randn(100, 3, 24, 24)
        
    def __getitem__(self, index):
        x = self.data[index]
        return x

    def __len__(self):
        return len(self.data)
    
dataset = MyDataset()
loader = DataLoader(
    dataset,
    batch_size=10,
    num_workers=1,
    shuffle=False
)


nb_samples = 0.
channel_mean = torch.Tensor([0., 0., 0.])
channel_std = torch.Tensor([0., 0., 0.])
for images in tqdm_notebook(loader):
    # scale image to be between 0 and 1 
    images=images/255.
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1)*images.size(2), 3)
    for i in range(3):
        channel_mean[i]+=images[:, :,i].mean(1).sum(0)
        channel_std[i]+=images[:, :,i].std(1).sum(0)
    nb_samples += batch_samples

channel_mean /= nb_samples
channel_std /= nb_samples

I had a quick question about mean and std calculation for my custom dataset( around 5.5k train images). Now since these values depend on the batch size, if I were to change the batch_size (as a hyperparmeter), the mean and std need to be calculated each time the batch size changes? Since I haven’t done like this for CIFAR-10 or Imagenet

image normalization here isn’t based on batch statistics but dateset statistics. You calculate the mean and std of your entire dateset and use that to normalize each sample of the batch

1 Like

Here you go, didn’t test it rigorously, but it gave the right values for a few test values:

class MyDataset(Dataset):
    def __init__(self):
        self.data = torch.zeros(100, 3, 24, 24)
        self.data[:,0:1,:,:] = 0.
        self.data[:,1:2,:,:] = 1.
        self.data[:,2:3,:,:] = torch.arange(0.,24., step = 1)
        
    def __getitem__(self, index):
        x = self.data[index]
        return x

    def __len__(self):
        return len(self.data)
    

dataset = MyDataset()
loader = DataLoader(
    dataset,
    batch_size=10,
    num_workers=0,
    shuffle=False
)

mean = 0.
nb_samples = 0.
for data in loader:
    batch_samples = data.size(0)
    data = data.view(batch_samples, data.size(1), -1)
    mean += data.mean(2).sum(0)
    nb_samples += batch_samples

mean /= nb_samples

temp = 0.
nb_samples = 0.
for data in loader:
    batch_samples = data.size(0)
    elementNum = data.size(0) * data.size(2) * data.size(3)
    data = data.permute(1,0,2,3).reshape(3, elementNum)
    temp += ((data - mean.repeat(elementNum,1).permute(1,0))**2).sum(1)/(elementNum*batch_samples)
    nb_samples += batch_samples

std = torch.sqrt(temp/nb_samples)
print(mean)
print(std)

By the way, pretty much all top google results that come when googling how to calculate the mean and std with pytorch point to your answer, having it wrong can really spread misinformation, especially for an easy to miss error like that.

Also, a lot of your answers here helped me a lot learning pytorch, so in case I don’t get another chance: thank you very much.

1 Like

Why do you step through the data in loader twice, once for mean and once for std? Wouldn’t it be quicker to calculate both at the same time?

Sure, you can go in one pass accumulating the total number of samples, the total sum, and the total sum of squares, then when you get these you can use them to get the mean and the std.

Thanks for your solution. The std computation still seems to be incorrect. I tried to re-implement it with a comparison with torch.mean and torch.std. I could get exactly the same results.

class MyDataset(Dataset):
    def __init__(self):
        self.data = torch.randn(1000, 3, 224, 224)

    def __getitem__(self, index):
        x = self.data[index]
        return x

    def __len__(self):
        return len(self.data)

def main():
    device = torch.device("cuda")
    dataset = MyDataset()

    start = timeit.time.perf_counter()
    data = dataset.data.to(device)
    print("Mean:", torch.mean(data, dim=(0, 2, 3)))
    print("Std:", torch.std(data, dim=(0, 2, 3)))
    print("Elapsed time: %.3f seconds" % (timeit.time.perf_counter() - start))
    print()

    start = timeit.time.perf_counter()
    mean = 0.
    for data in dataset:
        data = data.to(device)
        mean += torch.mean(data, dim=(1, 2))
    mean /= len(dataset)
    print("Mean:", mean)

    temp = 0.
    nb_samples = 0.
    for data in dataset:
        data = data.to(device)
        temp += ((data.view(3, -1) - mean.unsqueeze(1)) ** 2).sum(dim=1)
        nb_samples += np.prod(data.size()[1:])
    std = torch.sqrt(temp/nb_samples)
    print("Std:", std)
    print("Elapsed time: %.3f seconds" % (timeit.time.perf_counter() - start))
1 Like

People finding this post, please be careful:

avg(std(minibatch_1) + std(minibatch_2) + .. ) != std(dataset)

Rather compute the avg(var(minibatch_1) + var(minibatch_2) + ..) and take its sqrt(..) as per SO link shared by @amit_js.

4 Likes

With the first approach (average of the std):
E[(sqrt(S_1) + sqrt(S_2) + … sqrt(S_n)) / n] = E[sqrt(S_1)] if the S_i are iid like in our case (E stay for expected value and S_i are the sample variances of each mini-batch).

E[sqrt(S_1)] <= sqrt(E[S_1]) = std(X) for the Jensen’s inequality.

With the second approach (sqrt of the average of the var):
E[sqrt((S_1 + S_2 + … S_n) / n)] = E[sqrt(S_tot)] <= sqrt(E[(S_tot)] = sqrt(var(X)) = std

So both approaches underestimate the real std (correct?).

I’m missing sometime? there is a way to demonstrate that the second approach is better than the first approach?

1 Like
mean = 0.
std = 0.
nb_samples = 0.
for data in dataloader:
    print(type(data))
    batch_samples = data.size(0)
    
    data.shape(0)
    data = data.view(batch_samples, data.size(1), -1)
    mean += data.mean(2).sum(0)
    std += data.std(2).sum(0)
    nb_samples += batch_samples

mean /= nb_samples
std /= nb_samples

error is:

<class 'dict'>

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-51-e8ba3c8718bb> in <module>
      5 for data in dataloader:
      6     print(type(data))
----> 7     batch_samples = data.size(0)
      8 
      9     data.shape(0)

AttributeError: 'dict' object has no attribute 'size'

this is print(data) result:

{'image': tensor([[[[0.2961, 0.2941, 0.2941,  ..., 0.2460, 0.2456, 0.2431],
          [0.2953, 0.2977, 0.2980,  ..., 0.2442, 0.2431, 0.2431],
          [0.2941, 0.2941, 0.2980,  ..., 0.2471, 0.2471, 0.2448],
          ...,
          [0.3216, 0.3216, 0.3216,  ..., 0.2482, 0.2471, 0.2471],
          [0.3216, 0.3241, 0.3253,  ..., 0.2471, 0.2471, 0.2450],
          [0.3216, 0.3216, 0.3216,  ..., 0.2471, 0.2452, 0.2431]],

         [[0.2961, 0.2941, 0.2941,  ..., 0.2460, 0.2456, 0.2431],
          [0.2953, 0.2977, 0.2980,  ..., 0.2442, 0.2431, 0.2431],
          [0.2941, 0.2941, 0.2980,  ..., 0.2471, 0.2471, 0.2448],
          ...,
          [0.3216, 0.3216, 0.3216,  ..., 0.2482, 0.2471, 0.2471],
          [0.3216, 0.3241, 0.3253,  ..., 0.2471, 0.2471, 0.2450],
          [0.3216, 0.3216, 0.3216,  ..., 0.2471, 0.2452, 0.2431]],

         [[0.2961, 0.2941, 0.2941,  ..., 0.2460, 0.2456, 0.2431],
          [0.2953, 0.2977, 0.2980,  ..., 0.2442, 0.2431, 0.2431],
          [0.2941, 0.2941, 0.2980,  ..., 0.2471, 0.2471, 0.2448],
          ...,
          [0.3216, 0.3216, 0.3216,  ..., 0.2482, 0.2471, 0.2471],
          [0.3216, 0.3241, 0.3253,  ..., 0.2471, 0.2471, 0.2450],
          [0.3216, 0.3216, 0.3216,  ..., 0.2471, 0.2452, 0.2431]]],


        [[[0.3059, 0.3093, 0.3140,  ..., 0.3373, 0.3363, 0.3345],
          [0.3059, 0.3093, 0.3165,  ..., 0.3412, 0.3389, 0.3373],
          [0.3098, 0.3131, 0.3176,  ..., 0.3450, 0.3412, 0.3412],
          ...,
          [0.2931, 0.2966, 0.2931,  ..., 0.2549, 0.2539, 0.2510],
          [0.2902, 0.2902, 0.2902,  ..., 0.2510, 0.2510, 0.2502],
          [0.2864, 0.2900, 0.2863,  ..., 0.2510, 0.2510, 0.2510]],

         [[0.3059, 0.3093, 0.3140,  ..., 0.3373, 0.3363, 0.3345],
          [0.3059, 0.3093, 0.3165,  ..., 0.3412, 0.3389, 0.3373],
          [0.3098, 0.3131, 0.3176,  ..., 0.3450, 0.3412, 0.3412],
          ...,
          [0.2931, 0.2966, 0.2931,  ..., 0.2549, 0.2539, 0.2510],
          [0.2902, 0.2902, 0.2902,  ..., 0.2510, 0.2510, 0.2502],
          [0.2864, 0.2900, 0.2863,  ..., 0.2510, 0.2510, 0.2510]],

         [[0.3059, 0.3093, 0.3140,  ..., 0.3373, 0.3363, 0.3345],
          [0.3059, 0.3093, 0.3165,  ..., 0.3412, 0.3389, 0.3373],
          [0.3098, 0.3131, 0.3176,  ..., 0.3450, 0.3412, 0.3412],
          ...,
          [0.2931, 0.2966, 0.2931,  ..., 0.2549, 0.2539, 0.2510],
          [0.2902, 0.2902, 0.2902,  ..., 0.2510, 0.2510, 0.2502],
          [0.2864, 0.2900, 0.2863,  ..., 0.2510, 0.2510, 0.2510]]],


        [[[0.2979, 0.2980, 0.3015,  ..., 0.2825, 0.2784, 0.2784],
          [0.2980, 0.2980, 0.2980,  ..., 0.2830, 0.2764, 0.2795],
          [0.2980, 0.2980, 0.3012,  ..., 0.2827, 0.2814, 0.2797],
          ...,
          [0.3282, 0.3293, 0.3294,  ..., 0.2238, 0.2235, 0.2235],
          [0.3255, 0.3255, 0.3255,  ..., 0.2240, 0.2235, 0.2229],
          [0.3225, 0.3255, 0.3255,  ..., 0.2216, 0.2235, 0.2223]],

         [[0.2979, 0.2980, 0.3015,  ..., 0.2825, 0.2784, 0.2784],
          [0.2980, 0.2980, 0.2980,  ..., 0.2830, 0.2764, 0.2795],
          [0.2980, 0.2980, 0.3012,  ..., 0.2827, 0.2814, 0.2797],
          ...,
          [0.3282, 0.3293, 0.3294,  ..., 0.2238, 0.2235, 0.2235],
          [0.3255, 0.3255, 0.3255,  ..., 0.2240, 0.2235, 0.2229],
          [0.3225, 0.3255, 0.3255,  ..., 0.2216, 0.2235, 0.2223]],

         [[0.2979, 0.2980, 0.3015,  ..., 0.2825, 0.2784, 0.2784],
          [0.2980, 0.2980, 0.2980,  ..., 0.2830, 0.2764, 0.2795],
          [0.2980, 0.2980, 0.3012,  ..., 0.2827, 0.2814, 0.2797],
          ...,
          [0.3282, 0.3293, 0.3294,  ..., 0.2238, 0.2235, 0.2235],
          [0.3255, 0.3255, 0.3255,  ..., 0.2240, 0.2235, 0.2229],
          [0.3225, 0.3255, 0.3255,  ..., 0.2216, 0.2235, 0.2223]]]],
       dtype=torch.float64), 'landmarks': tensor([[[160.2964,  98.7339],
         [223.0788,  72.5067],
         [ 82.4163,  70.3733],
         [152.3213, 137.7867]],

        [[198.3194,  74.4341],
         [273.7188, 118.7733],
         [117.7113,  80.8000],
         [182.0750, 107.2533]],

        [[137.4789,  92.8523],
         [174.9463,  40.3467],
         [ 57.3013,  59.1200],
         [129.3375, 131.6533]]], dtype=torch.float64)}
dataloader = DataLoader(transformed_dataset, batch_size=3,
                        shuffle=True, num_workers=4)

and

transformed_dataset = MothLandmarksDataset(csv_file='moth_gt.csv',
                                           root_dir='.',
                                           transform=transforms.Compose(
                                               [
                                               Rescale(256),
                                               RandomCrop(224),
                                               
                                               ToTensor()#,
                                               ##transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
                                               ##         std = [ 0.229, 0.224, 0.225 ])
                                               ]
                                                                        )
                                           )

and

class MothLandmarksDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.landmarks_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
        image = io.imread(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        sample = {'image': image, 'landmarks': landmarks}

        if self.transform:
            sample = self.transform(sample)

        return sample

Your data tensor is a dict so you would need to access the image inside it.

1 Like

Hi @ptrblck,

I want to compute the mean and std deviation of the latent space of the autoencoders while training the autoencoders. Can you suggest a method for that?

Thanks,

I’m not sure if I understand the use case correctly, but you could use torch.mean and torch.std on the latent activation tensor during the forward pass.
If you want to calculate these stats for the latent tensors of the complete dataset, you could store these activations by returning them directly in the forward or via a forward hook and calculate the stats after the whole epoch is done.