Cannot close the hdf5 in dataloader?

I have a class to load hdf5 likes

class H5Loader(data.Dataset):

    def __init__(self):
        self.hdf5_files = ['1.h5', '2.h5', '3.h5']
        self.data_lst = []
        for ind in range (len(self.hdf5_list)):
            h5_file = h5py.File(self.hdf5_list[ind])
            data_ = h5_file.get('data')
            self.data_lst.append(data_)
            #h5_file.close()
            
    def __getitem__(self, index):
        self.data = np.asarray(self.data_lst[index])        
        return torch.from_numpy(self.data).float()

    def __len__(self):
        return len(self.hdf5_list)

It worked. However, the hdf5 does not close after __init__ function. Hence, I closed it using h5_file.close() (uncommend line above). But it show the error

  File "h5loader.py", line 30, in __getitem__
   self.data = np.asarray(self.data_lst[index])      
  File "/home/john/anaconda3/lib/python3.6/site-packages/numpy/core/numeric.py", line 501, in asarray
    return array(a, dtype, copy=False, order=order)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "/home/john/anaconda3/lib/python3.6/site-packages/h5py/_hl/dataset.py", line 690, in __array__
    arr = numpy.empty(self.shape, dtype=self.dtype if dtype is None else dtype)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "/home/john/anaconda3/lib/python3.6/site-packages/h5py/_hl/dataset.py", line 225, in shape
    return self.id.shape
  File "h5py/h5d.pyx", line 131, in h5py.h5d.DatasetID.shape.__get__
  File "h5py/h5d.pyx", line 132, in h5py.h5d.DatasetID.shape.__get__
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5d.pyx", line 288, in h5py.h5d.DatasetID.get_space
ValueError: Not a dataset (not a dataset)

Any suggestion to close h5 file?

Hi,

include a __del__ function.

class H5Loader(data.Dataset):

    def __init__(self):
        self.hdf5_files = ['1.h5', '2.h5', '3.h5']
        self.data_lst = []
        for ind in range (len(self.hdf5_list)):
            self.h5_file = h5py.File(self.hdf5_list[ind])
            data_ = self.h5_file.get('data')
            self.data_lst.append(data_)
            #h5_file.close()
            
    def __getitem__(self, index):
        self.data = np.asarray(self.data_lst[index])        
        return torch.from_numpy(self.data).float()

    def __len__(self):
        return len(self.hdf5_list)

    def __del__(self):
        self.h5_file.close()

but see also this problem with hdf5.

Regards,
Foivos

I tried it and it does not work,. It cannot find self.h5_file in the del function

Hi John,

first the syntax is def __del__(self): self.h5_file.close(). Second, are you sure you defined the self.h5_file in the def __init__(...)?

It works in my tests. In fact, it should work without subclassing data.Dataset. Your problem sounds like you didn’t incluce the self. prefix when you defined the h5_file in your init function?

Regards,
Foivos

No, I included it. This is full error. Do you use number of num_workers 1 or 0. I used 4

  File "/home/join/anaconda3/lib/python3.6/site-packages/h5py/_hl/files.py", line 167, in make_fid
    fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5f.pyx", line 98, in h5py.h5f.create
OSError: Unable to create file (unable to open file: name = '.a.h5', errno = 17, error message = 'File exists', flags = 15, o_flags = c2)
Exception ignored in: <bound method H5Dataset.__del__ of <dataloader.H5Dataset object at 0x7f01bc4d4940>>
Traceback (most recent call last):
  File "loader.py", line 70, in __del__
    self.h5_file.close()
AttributeError: 'H5Dataset' object has no attribute 'h5_file'

You have some confussion on the syntax in your code. Can you try below code.

class H5Loader(data.Dataset):

    def __init__(self):
        self.hdf5_files = ['1.h5', '2.h5', '3.h5']
        self.data_lst = []
        for ind in self.hdf5_files:
            h5_file = h5py.File(ind)
            data_ = h5_file.get('data')
            self.data_lst.append(data_)
            h5_file.close()

@otutay: I tried to close in init but it cannot work.

Well, you missed something when calling h5py.
Use h5py.File(self.hdf5_list[ind], 'r'). if you want to open file and read data.

I tried below code and it works perfectly.

import h5py
class H5Loader():

    def __init__(self):
        self.hdf5_files = ['A.hdf5', 'B.hdf5']
        self.data_lst = []
        for ind in self.hdf5_files:
            h5_file = h5py.File(ind)
            data_ = h5_file.get('data')
            self.data_lst.append(data_)
            h5_file.close()


if __name__=="__main__":

    hdfDummy = H5Loader()

@Foivos_Diakogiannis @otutay and @AlbertZhang Please look at the issue. I have make a completed bug in the Completed code with bug report for hdf5 dataset. How to fix?