Good answer. Thank you
Sorry for reviving this thread, but your current approach gives a high error in the estimation of std
, if I add an offset to the random data:
class MyDataset(Dataset):
def __init__(self):
self.data = torch.randn(1000, 3, 24, 24) + 10000
def __getitem__(self, index):
x = self.data[index]
return x
def __len__(self):
return len(self.data)
def online_mean_and_sd(loader):
"""Compute the mean and sd in an online fashion
Var[x] = E[X^2] - E^2[X]
"""
cnt = 0
fst_moment = torch.empty(3)
snd_moment = torch.empty(3)
for data in loader:
b, c, h, w = data.shape
nb_pixels = b * h * w
sum_ = torch.sum(data, dim=[0, 2, 3])
sum_of_square = torch.sum(data ** 2, dim=[0, 2, 3])
fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)
cnt += nb_pixels
return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)
dataset = MyDataset()
loader = DataLoader(
dataset,
batch_size=1,
num_workers=1,
shuffle=False
)
mean, std = online_mean_and_sd(loader)
print(mean, dataset.data.mean([0, 2, 3]))
> tensor([10000.0039, 9999.9990, 10000.0020]) tensor([10000.0010, 10000.0000, 10000.0000])
print(std, dataset.data.std([0, 2, 3]))
> tensor([15.4919, 18.7617, 16.7332]) tensor([0.9995, 0.9994, 1.0010])
Correct. I’ve tested on CIFAR10 and the mean of pixel stds is [51.6, 50.8, 51.2]
while the pixel std is [63.0, 62.1, 66.7]
, which is quite different.
The code below lacks optimization but gives the correct std:
pixel_mean = np.zeros(3)
pixel_std = np.zeros(3)
k = 1
for image, _ in tqdm(dataset, "Computing mean/std", len(dataset), unit="samples"):
image = np.array(image)
pixels = image.reshape((-1, image.shape[2]))
for pixel in pixels:
diff = pixel - pixel_mean
pixel_mean += diff / k
pixel_std += diff * (pixel - pixel_mean)
k += 1
pixel_std = np.sqrt(pixel_std / (k - 2))
print(pixel_mean)
print(pixel_std)
Adapted from this SO answer.
This works for me.
I adjust your answer. Thanks a lot
pixel_mean = np.zeros(1)
pixel_std = np.zeros(1)
k = 1
for i_batch, sample_batched in enumerate(dataloader):
image = np.array(sample_batched['image'])
pixels = image.reshape((-1, image.shape[1]))
for pixel in pixels:
diff = pixel - pixel_mean
pixel_mean = pixel_mean + diff / k
pixel_std = pixel_std + diff * (pixel - pixel_mean)
k += 1
pixel_std = np.sqrt(pixel_std / (k - 2))
print(pixel_mean)# [5.50180734]
print(pixel_std)#[8.27773514]
Computation of std is wrong! std dev of dataset is NOT equal to avg of std dev of batches. Authors needs to edit their answers with “Do not use this!” warning.
Hi @ptrblck,
std can’t be added and then averaged to get the overall std, please have a look at this,https://stackoverflow.com/a/60803379/8063334
If you are looking for mean and std statistics for each channel of the dataset, instead of just the mean and std, I created the solution below. The code is mainly an extension of ptrblck’s code above
class MyDataset(Dataset):
def __init__(self):
self.data = torch.randn(100, 3, 24, 24)
def __getitem__(self, index):
x = self.data[index]
return x
def __len__(self):
return len(self.data)
dataset = MyDataset()
loader = DataLoader(
dataset,
batch_size=10,
num_workers=1,
shuffle=False
)
nb_samples = 0.
channel_mean = torch.Tensor([0., 0., 0.])
channel_std = torch.Tensor([0., 0., 0.])
for images in tqdm_notebook(loader):
# scale image to be between 0 and 1
images=images/255.
batch_samples = images.size(0)
images = images.view(batch_samples, images.size(1)*images.size(2), 3)
for i in range(3):
channel_mean[i]+=images[:, :,i].mean(1).sum(0)
channel_std[i]+=images[:, :,i].std(1).sum(0)
nb_samples += batch_samples
channel_mean /= nb_samples
channel_std /= nb_samples
I had a quick question about mean and std calculation for my custom dataset( around 5.5k train images). Now since these values depend on the batch size, if I were to change the batch_size (as a hyperparmeter), the mean and std need to be calculated each time the batch size changes? Since I haven’t done like this for CIFAR-10 or Imagenet
image normalization here isn’t based on batch statistics but dateset statistics. You calculate the mean and std of your entire dateset and use that to normalize each sample of the batch
Here you go, didn’t test it rigorously, but it gave the right values for a few test values:
class MyDataset(Dataset):
def __init__(self):
self.data = torch.zeros(100, 3, 24, 24)
self.data[:,0:1,:,:] = 0.
self.data[:,1:2,:,:] = 1.
self.data[:,2:3,:,:] = torch.arange(0.,24., step = 1)
def __getitem__(self, index):
x = self.data[index]
return x
def __len__(self):
return len(self.data)
dataset = MyDataset()
loader = DataLoader(
dataset,
batch_size=10,
num_workers=0,
shuffle=False
)
mean = 0.
nb_samples = 0.
for data in loader:
batch_samples = data.size(0)
data = data.view(batch_samples, data.size(1), -1)
mean += data.mean(2).sum(0)
nb_samples += batch_samples
mean /= nb_samples
temp = 0.
nb_samples = 0.
for data in loader:
batch_samples = data.size(0)
elementNum = data.size(0) * data.size(2) * data.size(3)
data = data.permute(1,0,2,3).reshape(3, elementNum)
temp += ((data - mean.repeat(elementNum,1).permute(1,0))**2).sum(1)/(elementNum*batch_samples)
nb_samples += batch_samples
std = torch.sqrt(temp/nb_samples)
print(mean)
print(std)
By the way, pretty much all top google results that come when googling how to calculate the mean and std with pytorch point to your answer, having it wrong can really spread misinformation, especially for an easy to miss error like that.
Also, a lot of your answers here helped me a lot learning pytorch, so in case I don’t get another chance: thank you very much.
Why do you step through the data in loader twice, once for mean and once for std? Wouldn’t it be quicker to calculate both at the same time?
Sure, you can go in one pass accumulating the total number of samples, the total sum, and the total sum of squares, then when you get these you can use them to get the mean and the std.
Thanks for your solution. The std computation still seems to be incorrect. I tried to re-implement it with a comparison with torch.mean and torch.std. I could get exactly the same results.
class MyDataset(Dataset):
def __init__(self):
self.data = torch.randn(1000, 3, 224, 224)
def __getitem__(self, index):
x = self.data[index]
return x
def __len__(self):
return len(self.data)
def main():
device = torch.device("cuda")
dataset = MyDataset()
start = timeit.time.perf_counter()
data = dataset.data.to(device)
print("Mean:", torch.mean(data, dim=(0, 2, 3)))
print("Std:", torch.std(data, dim=(0, 2, 3)))
print("Elapsed time: %.3f seconds" % (timeit.time.perf_counter() - start))
print()
start = timeit.time.perf_counter()
mean = 0.
for data in dataset:
data = data.to(device)
mean += torch.mean(data, dim=(1, 2))
mean /= len(dataset)
print("Mean:", mean)
temp = 0.
nb_samples = 0.
for data in dataset:
data = data.to(device)
temp += ((data.view(3, -1) - mean.unsqueeze(1)) ** 2).sum(dim=1)
nb_samples += np.prod(data.size()[1:])
std = torch.sqrt(temp/nb_samples)
print("Std:", std)
print("Elapsed time: %.3f seconds" % (timeit.time.perf_counter() - start))
People finding this post, please be careful:
avg(std(minibatch_1) + std(minibatch_2) + .. ) != std(dataset)
Rather compute the avg(var(minibatch_1) + var(minibatch_2) + ..)
and take its sqrt(..)
as per SO link shared by @amit_js.
With the first approach (average of the std):
E[(sqrt(S_1) + sqrt(S_2) + … sqrt(S_n)) / n] = E[sqrt(S_1)] if the S_i are iid like in our case (E stay for expected value and S_i are the sample variances of each mini-batch).
E[sqrt(S_1)] <= sqrt(E[S_1]) = std(X) for the Jensen’s inequality.
With the second approach (sqrt of the average of the var):
E[sqrt((S_1 + S_2 + … S_n) / n)] = E[sqrt(S_tot)] <= sqrt(E[(S_tot)] = sqrt(var(X)) = std
So both approaches underestimate the real std (correct?).
I’m missing sometime? there is a way to demonstrate that the second approach is better than the first approach?
mean = 0.
std = 0.
nb_samples = 0.
for data in dataloader:
print(type(data))
batch_samples = data.size(0)
data.shape(0)
data = data.view(batch_samples, data.size(1), -1)
mean += data.mean(2).sum(0)
std += data.std(2).sum(0)
nb_samples += batch_samples
mean /= nb_samples
std /= nb_samples
error is:
<class 'dict'>
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-51-e8ba3c8718bb> in <module>
5 for data in dataloader:
6 print(type(data))
----> 7 batch_samples = data.size(0)
8
9 data.shape(0)
AttributeError: 'dict' object has no attribute 'size'
this is print(data) result:
{'image': tensor([[[[0.2961, 0.2941, 0.2941, ..., 0.2460, 0.2456, 0.2431],
[0.2953, 0.2977, 0.2980, ..., 0.2442, 0.2431, 0.2431],
[0.2941, 0.2941, 0.2980, ..., 0.2471, 0.2471, 0.2448],
...,
[0.3216, 0.3216, 0.3216, ..., 0.2482, 0.2471, 0.2471],
[0.3216, 0.3241, 0.3253, ..., 0.2471, 0.2471, 0.2450],
[0.3216, 0.3216, 0.3216, ..., 0.2471, 0.2452, 0.2431]],
[[0.2961, 0.2941, 0.2941, ..., 0.2460, 0.2456, 0.2431],
[0.2953, 0.2977, 0.2980, ..., 0.2442, 0.2431, 0.2431],
[0.2941, 0.2941, 0.2980, ..., 0.2471, 0.2471, 0.2448],
...,
[0.3216, 0.3216, 0.3216, ..., 0.2482, 0.2471, 0.2471],
[0.3216, 0.3241, 0.3253, ..., 0.2471, 0.2471, 0.2450],
[0.3216, 0.3216, 0.3216, ..., 0.2471, 0.2452, 0.2431]],
[[0.2961, 0.2941, 0.2941, ..., 0.2460, 0.2456, 0.2431],
[0.2953, 0.2977, 0.2980, ..., 0.2442, 0.2431, 0.2431],
[0.2941, 0.2941, 0.2980, ..., 0.2471, 0.2471, 0.2448],
...,
[0.3216, 0.3216, 0.3216, ..., 0.2482, 0.2471, 0.2471],
[0.3216, 0.3241, 0.3253, ..., 0.2471, 0.2471, 0.2450],
[0.3216, 0.3216, 0.3216, ..., 0.2471, 0.2452, 0.2431]]],
[[[0.3059, 0.3093, 0.3140, ..., 0.3373, 0.3363, 0.3345],
[0.3059, 0.3093, 0.3165, ..., 0.3412, 0.3389, 0.3373],
[0.3098, 0.3131, 0.3176, ..., 0.3450, 0.3412, 0.3412],
...,
[0.2931, 0.2966, 0.2931, ..., 0.2549, 0.2539, 0.2510],
[0.2902, 0.2902, 0.2902, ..., 0.2510, 0.2510, 0.2502],
[0.2864, 0.2900, 0.2863, ..., 0.2510, 0.2510, 0.2510]],
[[0.3059, 0.3093, 0.3140, ..., 0.3373, 0.3363, 0.3345],
[0.3059, 0.3093, 0.3165, ..., 0.3412, 0.3389, 0.3373],
[0.3098, 0.3131, 0.3176, ..., 0.3450, 0.3412, 0.3412],
...,
[0.2931, 0.2966, 0.2931, ..., 0.2549, 0.2539, 0.2510],
[0.2902, 0.2902, 0.2902, ..., 0.2510, 0.2510, 0.2502],
[0.2864, 0.2900, 0.2863, ..., 0.2510, 0.2510, 0.2510]],
[[0.3059, 0.3093, 0.3140, ..., 0.3373, 0.3363, 0.3345],
[0.3059, 0.3093, 0.3165, ..., 0.3412, 0.3389, 0.3373],
[0.3098, 0.3131, 0.3176, ..., 0.3450, 0.3412, 0.3412],
...,
[0.2931, 0.2966, 0.2931, ..., 0.2549, 0.2539, 0.2510],
[0.2902, 0.2902, 0.2902, ..., 0.2510, 0.2510, 0.2502],
[0.2864, 0.2900, 0.2863, ..., 0.2510, 0.2510, 0.2510]]],
[[[0.2979, 0.2980, 0.3015, ..., 0.2825, 0.2784, 0.2784],
[0.2980, 0.2980, 0.2980, ..., 0.2830, 0.2764, 0.2795],
[0.2980, 0.2980, 0.3012, ..., 0.2827, 0.2814, 0.2797],
...,
[0.3282, 0.3293, 0.3294, ..., 0.2238, 0.2235, 0.2235],
[0.3255, 0.3255, 0.3255, ..., 0.2240, 0.2235, 0.2229],
[0.3225, 0.3255, 0.3255, ..., 0.2216, 0.2235, 0.2223]],
[[0.2979, 0.2980, 0.3015, ..., 0.2825, 0.2784, 0.2784],
[0.2980, 0.2980, 0.2980, ..., 0.2830, 0.2764, 0.2795],
[0.2980, 0.2980, 0.3012, ..., 0.2827, 0.2814, 0.2797],
...,
[0.3282, 0.3293, 0.3294, ..., 0.2238, 0.2235, 0.2235],
[0.3255, 0.3255, 0.3255, ..., 0.2240, 0.2235, 0.2229],
[0.3225, 0.3255, 0.3255, ..., 0.2216, 0.2235, 0.2223]],
[[0.2979, 0.2980, 0.3015, ..., 0.2825, 0.2784, 0.2784],
[0.2980, 0.2980, 0.2980, ..., 0.2830, 0.2764, 0.2795],
[0.2980, 0.2980, 0.3012, ..., 0.2827, 0.2814, 0.2797],
...,
[0.3282, 0.3293, 0.3294, ..., 0.2238, 0.2235, 0.2235],
[0.3255, 0.3255, 0.3255, ..., 0.2240, 0.2235, 0.2229],
[0.3225, 0.3255, 0.3255, ..., 0.2216, 0.2235, 0.2223]]]],
dtype=torch.float64), 'landmarks': tensor([[[160.2964, 98.7339],
[223.0788, 72.5067],
[ 82.4163, 70.3733],
[152.3213, 137.7867]],
[[198.3194, 74.4341],
[273.7188, 118.7733],
[117.7113, 80.8000],
[182.0750, 107.2533]],
[[137.4789, 92.8523],
[174.9463, 40.3467],
[ 57.3013, 59.1200],
[129.3375, 131.6533]]], dtype=torch.float64)}
dataloader = DataLoader(transformed_dataset, batch_size=3,
shuffle=True, num_workers=4)
and
transformed_dataset = MothLandmarksDataset(csv_file='moth_gt.csv',
root_dir='.',
transform=transforms.Compose(
[
Rescale(256),
RandomCrop(224),
ToTensor()#,
##transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
## std = [ 0.229, 0.224, 0.225 ])
]
)
)
and
class MothLandmarksDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.landmarks_frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.landmarks_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
image = io.imread(img_name)
landmarks = self.landmarks_frame.iloc[idx, 1:]
landmarks = np.array([landmarks])
landmarks = landmarks.astype('float').reshape(-1, 2)
sample = {'image': image, 'landmarks': landmarks}
if self.transform:
sample = self.transform(sample)
return sample
Your data
tensor is a dict
so you would need to access the image
inside it.
Hi @ptrblck,
I want to compute the mean and std deviation of the latent space of the autoencoders while training the autoencoders. Can you suggest a method for that?
Thanks,
I’m not sure if I understand the use case correctly, but you could use torch.mean
and torch.std
on the latent activation tensor during the forward pass.
If you want to calculate these stats for the latent tensors of the complete dataset, you could store these activations by returning them directly in the forward
or via a forward hook and calculate the stats after the whole epoch is done.