I met the situation that the usage of RAM gradullay increasing when the num_worker
is set too large, I would like to know why this happen, the code that I use and the dataset and data collector part is shown below:
class DiffusionDataset(Dataset):
def __init__(self, df, transform):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
image = Image.open(row['filepath'])
image = self.transform(image)
prompt = row['prompt']
return image, prompt
class DiffusionCollator:
def __init__(self):
self.st_model = SentenceTransformer(
'/kaggle/input/sentence-transformers-222/all-MiniLM-L6-v2',
device='cpu'
)
def __call__(self, batch):
images, prompts = zip(*batch)
images = torch.stack(images)
prompt_embeddings = self.st_model.encode(
prompts,
show_progress_bar=False,
convert_to_tensor=True
)
return images, prompt_embeddings