Sure!
There are two functions: sampler_
and loader
, where the former is called by the latter
def sampler_(labels):
_, counts = np.unique(labels, return_counts=True)
weights = 1.0 / torch.tensor(counts, dtype=torch.float)
sample_weights = weights[labels]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)
return sampler
def loader(data_dir, transform, train_split=0.75):
images, labels, _ = parse_data(data_dir)
dataset = ImageDataset(imgages, labels, transform)
dataset_size = len(dataset)
indices = list(range(dataset_size))
np.random.shuffle(indices) # shuffle the dataset before splitting into train and val
split = int(np.floor(train_split * dataset_size))
train_indices, val_indices = indices[:split], indices[split:]
train_labels = [labels[x] for x in train_indices]
val_labels = [labels[x] for x in val_indices]
train_sampler, val_sampler = sampler_(train_labels), sampler_(val_labels)
trainloader = DataLoader(dataset, sampler=train_sampler)
valloader = DataLoader(dataset, sampler=val_sampler)
return trainloader, valloader
for (feats, labels) in trainloader:
print(labels)
Output: tensor([5, 5, 5, 5, 6, 5, 5, 6, 8, 5, 6, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 5, 5, 5,
6, 5, 6, 5, 0, 5, 5, 6])
tensor([5, 5, 5, 5, 5, 6, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 6, 5, 5, 5])
tensor([5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 6, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5,
5, 6, 5, 5, 0, 5, 5, 5])
and so on (where 5 is the class in majority).
Please let me know your take on this.
Thanks!