After reading various posts about WeightedRandomSampler (some links are left as code comments) I’m unsure what to expect from the example below (pytorch 1.3.1)

```
import numpy as np
import torch
from torch.utils.data import TensorDataset as dset
torch.manual_seed(42)
data_size = 15
num_classes = 3
batch_size = 4
inputs = torch.tensor(range(data_size))
print("inputs", inputs.shape, inputs)
if 0:
targets = torch.floor(num_classes*torch.rand(data_size)).int()
else:
targets = torch.tensor([1, 0, 1, 1, 0, 1, 0, 1, 1, 2, 2, 1, 0, 0, 1], dtype=torch.int32)
print("targets", targets.shape, targets)
trainDataset = dset(inputs, targets)
# https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/10
class_sample_count = np.array([len(np.where(targets==t)[0]) for t in np.unique(targets)])
print("class_sample_count", class_sample_count.shape, class_sample_count)
weights = 1. / class_sample_count
print("weights", weights.shape, weights)
# https://discuss.pytorch.org/t/some-problems-with-weightedrandomsampler/23242/2
samples_weights = weights[targets]
assert len(samples_weights) == len(targets)
if 0:
print("samples_weights", samples_weights.shape, samples_weights)
sampler = torch.utils.data.sampler.WeightedRandomSampler(samples_weights, len(samples_weights), replacement=True)
trainLoader = torch.utils.data.DataLoader(dataset=trainDataset, batch_size=batch_size, sampler=sampler)
inputs_new = []
targets_new = []
for batch, (data, target) in enumerate(trainLoader):
counts = [len(np.where(target.numpy()==class_sample)[0]) for class_sample in range(len(class_sample_count))]
inputs_new.extend(data.data.numpy())
targets_new.extend(target.data.numpy())
print("batch {}, size {}, data {}, counts: {}".format(batch, data.shape[0], target.data, counts))
print("inputs_new", inputs_new)
print("targets_new", targets_new)
print("class_sample_count_new", np.array([len(np.where(targets_new==t)[0]) for t in np.unique(targets_new)]))
```

Output

```
inputs torch.Size([15]) tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
targets torch.Size([15]) tensor([1, 0, 1, 1, 0, 1, 0, 1, 1, 2, 2, 1, 0, 0, 1], dtype=torch.int32)
class_sample_count (3,) [5 8 2]
weights (3,) [0.2 0.125 0.5 ]
batch 0, size 4, data tensor([0, 1, 0, 2], dtype=torch.int32), counts: [2, 1, 1]
batch 1, size 4, data tensor([2, 0, 0, 0], dtype=torch.int32), counts: [3, 0, 1]
batch 2, size 4, data tensor([1, 1, 1, 0], dtype=torch.int32), counts: [1, 3, 0]
batch 3, size 3, data tensor([0, 1, 2], dtype=torch.int32), counts: [1, 1, 1]
inputs_new [1, 2, 1, 9, 9, 13, 13, 1, 2, 3, 7, 12, 12, 7, 9]
targets_new [0, 1, 0, 2, 2, 0, 0, 0, 1, 1, 1, 0, 0, 1, 2]
class_sample_count_new [7 5 3]
```

I would expect the class_sample_count_new to be “more” balanced, is this a correct assumption?

I’ve tried also larger values of data_size and batch_size, while removing manual_seed, but still the imbalance was surprisingly large.