How to combine datasets to return image and numpy file simultaneously

I am trying to build a dataloader that will take images and poses. The images are saved in the form of .jpg files, and the poses in the form of .npy files. The images and poses are in different folders but have the same sub-folder structure and name. The sub-folders are in the form of classes, i.e., each class has a corresponding folder. I want to apply image transformations and then return the images (for which I am using torchvision datasets.ImageFolder). For the poses, I am using torchvision datasets.DatasetFolder. How do I combine these two datasets so that I get both pose and image of the same name simultaneously?

Hi,

It’s very easy to write a custom data loader for your case. A great example is given here.

Thanks for the reply! But this didn’t work due to the sub-folder structure. The sub-folders are in the form of classes, i.e., each class has a corresponding folder.

Hi,
If you insist on using torchvision datasets.ImageFolder and DatasetsFolder. You need to inherit both ImageFolder and DatasetsFolder classes and create a new getitem method that returns both pose and image.

Thank you for your response. I am still not very clear on how to go about it though. These are my two classes for images and poses. How should the __init__ and __getitem__ methods of the new class be defined? I also need the same value of rand for both the classes.

class ReIDFolder_images(datasets.ImageFolder):

    def __init__(self, root, transform):
        super().__init__(root, transform)
        targets = np.asarray([s[1] for s in self.samples])
        self.targets = targets
        self.img_num = len(self.samples)
        print(self.img_num)

    def _get_cam_id(self, path):
        camera_id = []
        filename = os.path.basename(path)
        camera_id = filename.split('c')[1][0]
        return int(camera_id)-1

    def _get_pos_sample(self, target, index, path):
        pos_index = np.argwhere(self.targets == target)
        pos_index = pos_index.flatten()
        pos_index = np.setdiff1d(pos_index, index)
        if len(pos_index)==0:  # in the query set, only one sample
            return path
        else:
            rand = random.randint(0,len(pos_index)-1)
        return self.samples[pos_index[rand]][0]

    def _get_neg_sample(self, target):
        neg_index = np.argwhere(self.targets != target)
        neg_index = neg_index.flatten()
        rand = random.randint(0,len(neg_index)-1)
        return self.samples[neg_index[rand]]

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)

        pos_path = self._get_pos_sample(target, index, path)
        pos = self.loader(pos_path)

        if self.transform is not None:
            sample = self.transform(sample)
            pos = self.transform(pos)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target, pos


class ReIDFolder_poses(datasets.DatasetFolder):

    def __init__(self, root):
        super().__init__(root, loader=self.npy_loader, extensions='.npy')
        
        targets = np.asarray([s[1] for s in self.samples])
        self.targets = targets  
        self.img_num = len(self.samples)
        print(self.img_num)
         
    def npy_loader(self, path):
        sample = torch.Tensor(np.load(path))
        return sample

    def _get_cam_id(self, path):
        camera_id = []
        filename = os.path.basename(path)
        camera_id = filename.split('c')[1][0]
        return int(camera_id)-1

    def _get_pos_sample(self, target, index, path):
        pos_index = np.argwhere(self.targets == target)
        pos_index = pos_index.flatten()
        pos_index = np.setdiff1d(pos_index, index)
        if len(pos_index)==0:  # in the query set, only one sample
            return path
        else:
            rand = random.randint(0,len(pos_index)-1)
        return self.samples[pos_index[rand]][0]

    def _get_neg_sample(self, target):
        neg_index = np.argwhere(self.targets != target)
        neg_index = neg_index.flatten()
        rand = random.randint(0,len(neg_index)-1)
        return self.samples[neg_index[rand]]

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)

        pos_path = self._get_pos_sample(target, index, path)
        pos = self.loader(pos_path)

        return sample, target, pos

I was able to solve this problem! It turns out I didn’t have to inherit datasets.DatasetFolder. Since the labels were the same, I just created one class which inherits datasets.ImageFolder, and fed a modified path to the function npy_loader.