Image loading time is too long during training

I found that the loading time of one sample is very slow during training. For example, I have created a dataloader called RigidMotionDataset and a transformer ToTensor.

class RigidMotionDataset(Dataset):
    """Rigid Motion dataset."""

    def __init__(self, targets_file, root_dir, n_row=1, n_col=3, transform=None, train_test='train'):
        """
        Args:
            targets_file (string): Path to the target velocities stored
                as a 1d numpy array.
            root_dir (string): Directory with all the movies.
            n_row: # of rows randomly selected
            n_col: # of columns randomly selected
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.targets = np.load(targets_file)
        self.root_dir = root_dir
        self.n_row = n_row
        self.n_col = n_col
        self.transform = transform
        self.train_test = train_test

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        movie_name = os.path.join(self.root_dir,
                                  'X_'+self.train_test+f'_{idx}.npy')
        start_time = time.time()
        movie = np.load(movie_name)
        print(f'Loading time for one image is {time.time()-start_time}')
        row_start = np.random.randint(0,movie.shape[1]-self.n_row,1)[0]
        col_start = np.random.randint(0,movie.shape[2]-self.n_col,1)[0]
        target = self.targets[idx]
        target = np.array([target])
        sample = {'movie': movie[:,row_start:row_start+self.n_row,col_start:col_start+self.n_col], 'target': target}

        if self.transform:
            sample = self.transform(sample)

        return sample

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        movie, target = sample['movie'], sample['target']

        return {'movie': torch.from_numpy(movie).float(),
                'target': torch.from_numpy(target).float()}

If I check the loading time of one sample directly from the dataloader, the loading is relatively faster:

transformed_dataset = RigidMotionDataset(targets_file=targets_file,
                                         root_dir=root_dir,
                                         transform=ToTensor())

for i in range(len(transformed_dataset)):
    sample = transformed_dataset[i]
    if i == 3:
        break

Loading time for one image is 0.011719942092895508
Loading time for one image is 0.002935647964477539
Loading time for one image is 0.0025501251220703125
Loading time for one image is 0.0023491382598876953

However, during training, this loading time is slowed down more than 10 times:


FOLD 0

Reset trainable parameters of layer = Conv2d(30, 6, kernel_size=(1, 3), stride=(1, 1))
Reset trainable parameters of layer = Linear(in_features=6, out_features=100, bias=True)
Reset trainable parameters of layer = Linear(in_features=100, out_features=100, bias=True)
Reset trainable parameters of layer = Linear(in_features=100, out_features=100, bias=True)
Reset trainable parameters of layer = Linear(in_features=100, out_features=100, bias=True)
Reset trainable parameters of layer = Linear(in_features=100, out_features=100, bias=True)
Reset trainable parameters of layer = Linear(in_features=100, out_features=1, bias=True)
Starting epoch 1
Loading time for one image is 0.6959631443023682
Loading time for one image is 0.034269094467163086
Loading time for one image is 0.044671058654785156
Loading time for one image is 0.010963916778564453
Loading time for one image is 0.03287196159362793
Loading time for one image is 0.035894155502319336
Loading time for one image is 0.014185905456542969
Loading time for one image is 0.029668807983398438

My question is: is it normal that the loading time is much slower during training? Thanks.

The code for training:

# Configuration options
k_folds = 5
num_epochs = 1000
batch_size = 80
lr = 1e-3
loss_function = nn.MSELoss()

# For bfold results
results = {}

# Set fixed random number seed
torch.manual_seed(42)

# Dataset
dataset = transformed_dataset

# Define the K-fold Cross Validator
kfold = KFold(n_splits=k_folds, shuffle=True)

# Start print
print('--------------------------------')

train_loss = []
valid_loss = []
# K-fold Cross Validation model evaluation
for fold, (train_ids, valid_ids) in enumerate(kfold.split(dataset)):

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')

    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_ids)

    # Define data loaders for training and validing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset, 
                      batch_size=batch_size, sampler=train_subsampler, num_workers=1)
    validloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=batch_size, sampler=valid_subsampler, num_workers=1)

    # Init the neural network
    network = LocalFeedforwardNet()
    network.apply(reset_weights)

    # Initialize optimizer
    optimizer = torch.optim.Adam(network.parameters(), lr=lr, weight_decay=0.01)

    # Save loss value
    train_loss_tem = []
    
    # Run the training loop for defined number of epochs
    for epoch in range(num_epochs):
        start_time = time.time()
        # Print epoch
        print(f'Starting epoch {epoch+1}')
        
        train_loss_epoch = 0
        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader):
            # Get inputs
            inputs, targets = data['movie'], data['target']

            # Zero the gradients
            optimizer.zero_grad()

            # Perform forward pass
            outputs = network(inputs)
            
            # Compute loss
            loss = loss_function(outputs, targets)

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Print statistics
            train_loss_tem.append(loss.item())
            train_loss_epoch = train_loss_epoch + loss.item()
            if i % 40 == 0:
                print('Loss after mini-batch %5d: %.3f' %
                      (i, loss.item()))
        train_loss_epoch = train_loss_epoch / i+1
        print(f'Loss for epoch {epoch+1} is {train_loss_epoch}')
        print(f'Time for epoch {epoch+1} is {time.time()-start_time}')
    train_loss.append(train_loss_tem)

    # Process is complete.
    print('Training process has finished. Saving trained model.')

    # Print about validing
    print('Starting validing')

    # Saving the model
    save_path = f'../results/local_feedforward/model-fold-{fold}.pth'
    torch.save(network.state_dict(), save_path)

    # Evaluation for this fold
    correct, total = 0, 0
    with torch.no_grad():

        # Iterate over the valid data and generate predictions
        for i, data in enumerate(validloader):

            # Get inputs
            inputs, targets = data['movie'], data['target']

            # Generate outputs
            outputs = network(inputs)

            # Compute loss
            loss = loss_function(outputs, targets)
                      
        # Print loss
        print('Loss for fold %d: %d' % (fold, loss.item()))
        print('--------------------------------')
        results[fold] = loss.item()

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
print('--------------------------------')
sum = 0.0
for key, value in results.items():
    print(f'Fold {key}: {value}')
    sum += value
print(f'Average: {sum/len(results.items())}')

Are you using a DataLoader torch.utils.data.dataloader — PyTorch 1.8.1 documentation to help with loading images? Usually that can provide a significant speedup because it allows for prefetching and parallelization across CPU cores.

Thanks for your reply! Yes, I am using torch.utils.data.DataLoader. But my question is that it takes much more time to load one image during training than just run my RigidMotionDataset.

I think that can depend on a few different factors; what is the average file size here and what kind of storage is the dataset on (HDD, SSD/NVMe, ?).

The file size is 346 kb for each sample, and it is a .npy file. The data are stored in a HDD.