Memory leak when using h5 data

Hi, I have multiple h5 files that contains ~1000 samples each, here is my dataloader/sampler/dataset

class h5_dataset(object):
    def __init__(self, h5_path):
        self.h5_path = h5_path
        self.h5 = None

    def __getitem__(self, idx):
        if self.h5 is None:
            self.h5 = h5py.File(self.h5_path, 'r')

        data = self.h5['dataset'][idx]
        return data

class CustomDataset(Dataset):

    def __init__(self, h5_path_list):
        self.data_path_list = None
        self.h5_dataset = []
        for path in self.data_path_list:
            self.h5_dataset.append(h5_dataset(path))

    def __getitem__(self, idx_tuple):
        assert len(idx_tuple) == 2
        file_idx, idx = idx_tuple
        data = self.h5_dataset[file_idx][idx]
        return data

class RandInObjSampler(BatchSampler):
   
    def __init__(self, dataset, batch_size):
        self.dataset = dataset
        self.batch_size = batch_size
        self.obj_num = self.dataset.get_obj_num()
        self.queue = queue.Queue()
        self.remain_obj = None

    def __len__(self):
        return len(self.dataset) // self.batch_size

    def __iter__(self):
        self.remain_obj = [i for i in range(self.obj_num)]
        for _ in range(len(self)):
            if self.queue.qsize() < self.batch_size:
                # Random next obj & add the permutated idx.
                random.shuffle(self.remain_obj)

                next_obj = self.remain_obj[0]
                self.remain_obj = self.remain_obj[1:]
                next_obj_sample_num = self.dataset.get_sample_num(next_obj)
                map(self.queue.put, permutation_in_obj_index(next_obj, next_obj_sample_num))

            # get batchsized item from the queue
            items_idx = map(self.queue.get, [_ for _ in range(self.batch_size)])
            yield items_idx



class FixLenDataloader(DataLoader):
    def __init__(self, dataset, batch_size, pin_memory, num_workers):
        batch_sampler = RandInObjSampler(dataset, batch_size)
        super(FixLenDataloader, self).__init__(
            dataset=dataset,
            batch_sampler=batch_sampler,
            pin_memory=pin_memory,
            num_workers=num_workers)

and the issue here is when I just try to load the data by this script:

h5_folder = "/path/to/h5"
    h5_list = glob.glob(os.path.join(h5_folder, "*.h5"))
  

    dataset = CustomDataset(h5_list)
    dataloader = CustomDataloader(
        dataset,
        batch_size=128,
        pin_memory=torch.cuda.is_available(),
        num_workers=multiprocessing.cpu_count())
    for i in range(10):
        print "num_epoch", i
        for batch_idx, data in enumerate(dataloader):
            if batch_idx % 100 == 0:
                print('succees on batch {}'.format(batch_idx))
                memReport()
                cpuStats()

I am monitoring the memory with the scripts :How pytorch releases variable garbage?
when ~2000 batch, I get a CPU memory increase from 4.3G to 6.1G. The code also fails around 2000 batch when I am training the model. Any ideas? This is the memory trace.

2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
25.2
svmem(total=269832167424, available=200663445504, percent=25.6, used=59130613760, free=8998412288, active=143371280384, inactive=85837078528, buffers=2089795584, cached=199613345792, shared=9689022464)
('memory GB:', 4.2747650146484375)
succees on batch 1500
2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
32.0
svmem(total=269832167424, available=200383332352, percent=25.7, used=59333599232, free=9493114880, active=143184515072, inactive=85609123840, buffers=2089795584, cached=198915657728, shared=9768665088)
('memory GB:', 4.318531036376953)
succees on batch 1600
2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
31.0
svmem(total=269832167424, available=195722665984, percent=27.5, used=64067747840, free=5217337344, active=147790471168, inactive=85312073728, buffers=2089795584, cached=198457286656, shared=9693081600)
('memory GB:', 4.31866455078125)
succees on batch 1700
2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
38.1
svmem(total=269832167424, available=198720540672, percent=26.4, used=58239537152, free=12243107840, active=140344602624, inactive=86244818944, buffers=2089799680, cached=197259722752, shared=12520136704)
('memory GB:', 6.1643524169921875)
succees on batch 1800
2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
23.9
svmem(total=269832167424, available=195888242688, percent=27.4, used=61950873600, free=8822206464, active=144383623168, inactive=85638770688, buffers=2089803776, cached=196969283584, shared=11642433536)
('memory GB:', 6.0920257568359375)
succees on batch 1900
2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
33.9
svmem(total=269832167424, available=201348345856, percent=25.4, used=56451174400, free=14635212800, active=138813337600, inactive=85438504960, buffers=2089807872, cached=196655972352, shared=11682168832)
('memory GB:', 6.161582946777344)
succees on batch 2000
2.7.6 (default, Nov 13 2018, 12:45:42) 
[GCC 4.8.4]
23.7
svmem(total=269832167424, available=195374772224, percent=27.6, used=62412275712, free=9571815424, active=144475328512, inactive=84977864704, buffers=2089807872, cached=195758268416, shared=11695505408)
('memory GB:', 6.161590576171875)
Write failed: Broken pipe

Facing the same issue currently. Can you shed light on what worked for you?