I define a kind of sampler by myself, and I want to training the model on distributed system. I do not know how to modify the code to adopt to distributed system. The following is the code of sampler.
import torch
import numpy as np
class CategoriesSampler():
def __init__(self, label, n_batch, n_cls, n_per):
self.n_batch = n_batch
self.n_cls = n_cls
self.n_per = n_per
label = np.array(label)
self.m_ind = []
for i in range(max(label) + 1):
ind = np.argwhere(label == i).reshape(-1)
ind = torch.from_numpy(ind)
self.m_ind.append(ind)
def __len__(self):
return self.n_batch
def __iter__(self):
for i_batch in range(self.n_batch):
batch = []
# print(len(self.m_ind))
classes = torch.randperm(len(self.m_ind))[:self.n_cls]
for c in classes:
l = self.m_ind[c]
pos = torch.randperm(len(l))[:self.n_per]
batch.append(l[pos])
# else:
# print(c)
batch = torch.stack(batch).t().reshape(-1)
yield batch