RuntimeError: Too many open files... Custom Dataloader


I’m unsure why my code is crashing with the error message (RuntimeError: Too many open files. Communication with the workers is no longer possible.). It works when I only return the variables id, damage, growth_stage, and image_path (string variables), but it fails when I add the variable extent (an integer). How can I resolve this, and why is it happening with integers? Could this be a bug?

Pytorch 2.1.0.dev20230828+cu121
Pandas 2.0.3
PIL 9.3.0

Update: This error is not present in Pytorch 1.13.0

Thanks in advance

from import Dataset, DataLoader
from PIL import Image

import os
import random
import pandas as pd

class DroughtDamageDataset(Dataset):
    def __init__(self, df_path, shapes=(384,)):
        self.df_path = df_path
        if len(shapes) == 1:
            self.shapes = (shapes[0], shapes[0])
        elif len(shapes) == 2:
            self.shapes = shapes

        self.df = pd.read_csv(self.df_path)

        if 'train' in self.df_path.lower():
            self.mode = 'train'
            self.root_path = '../dataset/train'
            self.mode = 'test'
            self.root_path = '../dataset/test'

        self.save_dir = f"../dataset/images/{shapes[0]}"

        if not os.path.exists(self.save_dir):

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = os.path.join(self.root_path, row['filename'])
        # image = \
        #     .convert('RGB') \
        #     .resize(self.shapes)

        save_dir_path = f"{self.save_dir}/{row['ID']}.JPG"

        if self.mode == 'train':
            return {
                'id': row['ID'],
                'damage': row['damage'],
                'growth_stage': row['growth_stage'],
                'extent': row['extent'],
                f'image_{self.shapes[0]}': save_dir_path[3:]

trainset = DroughtDamageDataset('../dataset/Train.csv', shapes=(384, 384))

dataloader = DataLoader(