Getting filenames in Custom DataLoader

I want to build a custom dataloader which will use the class name and the image name as a permanent index. How can I get the filename and folder name of the current image in __getitem__()?

class MyDataset(Dataset):
    def __init__(self, root):
        .......
        self.root = root
        self.dataset = datasets.ImageFolder(root=self.root,
                                        transform=transforms)
    def __getitem__(self, index):
        data, target = self.dataset[index]
        index = index
        return data, target, index

    def __len__(self):
        return len(self.dataset)

My Dataset directory structure is as follows:
image

E.g For cat image 0 → index = cat0
I would appreciate if there is any technique to extract the parent folder and file name of the current index in __getitem__(self, index).

Thank you.

Hello,

You can get it from self.dataset.imgs, like this:

import os
class MyDataset(Dataset):
    def __init__(self, root):
        self.root = root
        self.dataset = datasets.ImageFolder(root=self.root, transform=transforms)
    def __getitem__(self, index):
        data, target = self.dataset[index]
        index = index
        return {
            "data": data,
            "target": target,
            "index": index,
            "folder": os.path.dirname(self.dataset.imgs[index][0]),
            "filename": os.path.basename(self.dataset.imgs[index][0]),
        }

    def __len__(self):
        return len(self.dataset)

dataset = MyDataset("/path/to/data/")
dataset[0]
Output:
{'data': <PIL.Image.Image image mode=RGB size=1920x1080 at 0x2C35E87A8D0>,
 'target': 0,
 'index': 0,
 'folder': '/path/to/data',
 'filename': 'foo.png'}
1 Like

@Andrei_Cristea This worked. Thank you :pray: