Hi,
I’m unsure why my code is crashing with the error message (RuntimeError: Too many open files. Communication with the workers is no longer possible.). It works when I only return the variables id, damage, growth_stage, and image_path (string variables), but it fails when I add the variable extent (an integer). How can I resolve this, and why is it happening with integers? Could this be a bug?
Pytorch 2.1.0.dev20230828+cu121
Pandas 2.0.3
PIL 9.3.0
Update: This error is not present in Pytorch 1.13.0
Thanks in advance
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import random
import pandas as pd
class DroughtDamageDataset(Dataset):
def __init__(self, df_path, shapes=(384,)):
self.df_path = df_path
if len(shapes) == 1:
self.shapes = (shapes[0], shapes[0])
elif len(shapes) == 2:
self.shapes = shapes
self.df = pd.read_csv(self.df_path)
if 'train' in self.df_path.lower():
self.mode = 'train'
self.root_path = '../dataset/train'
else:
self.mode = 'test'
self.root_path = '../dataset/test'
self.save_dir = f"../dataset/images/{shapes[0]}"
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
image_path = os.path.join(self.root_path, row['filename'])
# image = Image.open(image_path) \
# .convert('RGB') \
# .resize(self.shapes)
save_dir_path = f"{self.save_dir}/{row['ID']}.JPG"
# image.save(save_dir_path)
if self.mode == 'train':
return {
'id': row['ID'],
'damage': row['damage'],
'growth_stage': row['growth_stage'],
'extent': row['extent'],
f'image_{self.shapes[0]}': save_dir_path[3:]
}
trainset = DroughtDamageDataset('../dataset/Train.csv', shapes=(384, 384))
dataloader = DataLoader(
trainset,
shuffle=False,
num_workers=4,
batch_size=1
)