Using k-fold crossvalidation with pytorch

amy2 · January 31, 2024, 1:24pm

I am trying to add cross validation to my model,but I got confused on how to do it ,because from what I saw after creating the fold loop we call the dataloaders from inside the loop
in my case I have two functions

   def get_val_utils(opt):
        data augmentation 
              ........
       val_loader = torch.utils.data.DataLoader(val_data,
                                     batch_size=(opt.batch_size //
                                                 opt.n_val_samples),
                                     shuffle=False,
                                     num_workers=opt.n_threads,
                                     pin_memory=True,
                                     sampler=val_sampler,
                                     worker_init_fn=worker_init_fn,
                                     collate_fn=collate_fn)
   return val_loader, val_logger

and another function for the train_set

def get_train_utils(opt)
             data augmentation 
                       ...........
             train_loader = torch.utils.data.DataLoader(train_data,
                                       batch_size=opt.batch_size,
                                       shuffle=(train_sampler is None),
                                       num_workers=opt.n_threads,
                                       pin_memory=True,
                                       sampler=train_sampler,
                                       worker_init_fn=worker_init_fn)
return return (train_loader, train_sampler, train_logger, train_batch_logger,
    optimizer, scheduler)

and another function for the training loop

def main_worker(index, opt):
       -------other code------
  if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
                
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    prev_val_loss = None
    #--------------------------------
    train_losses = []
    train_accuracies = []
    val_losses=[]
    val_accuracies=[]
    #--------------------------------
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_loss, train_accuracy=train_epoch(i, train_loader, model, criterion, optimizer,# 
                        opt.device, current_lr, train_logger,
                        train_batch_logger, tb_writer, opt.distributed)
            #-----------------------------
            train_losses.append(train_loss)
            train_accuracies.append(train_accuracy)
            #-----------------------------
            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)


        if not opt.no_val:
          prev_val_loss,prev_val_accuracy = val_epoch(i, val_loader, model, criterion,#
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed)
            #---------------------------------
          val_losses.append(prev_val_loss)
          val_accuracies.append(prev_val_accuracy)
            #---------------------------------

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)

the val_loader and train_loader variables are called by the get_val_utils and get_train_utils
but I on how to call the fold loop and how to set the indexes and how to use the val_loader and train_loader
can someone explain to me ?