Loading Big Data using DataLoader

I use DataLoader to load a big dataset (16 GB). However, I ran out of memory. Is there any suggestion how can we use DataLoader in cases that we don’t have enough memory?

Here’s my code:


class StockDataset(Dataset):
    def __init__(self, file_path):
        self.data = []
        with jsonlines.open(file_path, 'r') as reader:
            for item in tqdm(reader, total=2564721, desc="Loading Data"):
                self.data.append(item)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        input_ids = torch.tensor([item['Close'] for item in sample['sample']])
        label = torch.tensor(sample['label'])
        return input_ids, label

class StockDataModule(pl.LightningDataModule):
    def __init__(self, train_file, val_file, test_file, batch_size=32):
        super().__init__()
        self.train_file = train_file
        self.val_file = val_file
        self.test_file = test_file
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = StockDataset(self.train_file)
        self.val_dataset = StockDataset(self.val_file)
        self.test_dataset = StockDataset(self.test_file)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)