Along with DistributedSampler
what if I wanted to pass another sampler like so to DataLoader
?
class PneumoSampler(Sampler):
def __init__(self, train_df, positive_perc=0.8):
assert positive_perc > 0, 'percentage of positive pneumothorax images must be greater then zero'
self.train_df = train_df
self.positive_perc = positive_perc
self.positive_idxs = self.train_df.query('has_mask==1').index.values
self.negative_idxs = self.train_df.query('has_mask!=1').index.values
self.n_positive = len(self.positive_idxs)
self.n_negative = int(self.n_positive * (1 - self.positive_perc) / self.positive_perc)
def __iter__(self):
negative_sample = np.random.choice(self.negative_idxs, size=self.n_negative)
shuffled = np.random.permutation(np.hstack((negative_sample, self.positive_idxs)))
return iter(shuffled.tolist())
def __len__(self):
return self.n_positive + self.n_negative
The sampler is taken from here.