About training and evaluate method

I have some question.When I training model,I use two method to evaluate model.One method is I evaluate model in the end of every epoch,like this:

def train_one_epoch(args, model, optimizer, scheduler, train_dataloader):
    """ Train the model """
    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataloader)*args.train_batch_size)

    epoch_step = 0
    epoch_loss = 0.0
    model.zero_grad()

    # 下面这里读取batch数据需要根据自己的数据脚本进行修改
    epoch_iterator = tqdm(train_dataloader, desc="Training")
    # model.train()
    scaler = GradScaler()

    # 增加对抗训练代码
    # fgm = FGM(model, epsilon=1, emb_name='word_embeddings.weight')
    # pgd = PGD(model, emb_name='word_embeddings.weight', epsilon=1.0, alpha=0.3)
    # k=3

    for step, batch in enumerate(epoch_iterator):
        model.train()
        batch = tuple(t.to(args.device) for t in batch)
        inputs = {'input_ids':batch[0], 'attention_mask':batch[1], 
                    'token_type_ids':batch[2], 
                    'start_positions':batch[3],
                    'end_positions':batch[4],
                    'answerable_label':batch[5]}


        if args.model_type in ["xlm", "roberta", "distilbert", "camembert", "bart", "longformer"]:
                del inputs["token_type_ids"]

        if args.model_type in ['xlnet', 'xlm']:
                inputs.update({'cls_index': batch[6],
                               'p_mask':       batch[9]})
        with autocast():
            outputs = model(**inputs)
            loss = outputs[0]

            # if args.n_gpu > 1:
            #     loss = loss.mean()  # mean() to average on multi-gpu parallel training

        epoch_loss += loss.item()
        scaler.scale(loss).backward()

        scaler.step(optimizer)
        scaler.update()
        scheduler.step()  # Update learning rate schedule
        optimizer.zero_grad()
        epoch_step += 1
    return epoch_loss / epoch_step

for epoch in range(int(args.num_train_epochs)):
        logger.info('***** Epoch {} Running Start! *****'.format(epoch+1))
        train_epoch_loss = train_one_epoch(args,model, optimizer, scheduler, train_dataloader)
        **val_results = val_one_epoch(args, model, tokenizer,val_dataloader)**

The another method is like this:

def train_and_evaluate(args, model, tokenizer, optimizer, scheduler, train_dataloader, val_loader, epoch, max_f1):
    """ Train the model """
    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataloader)*args.train_batch_size)

    epoch_step = 0
    epoch_loss = 0.0
    model.zero_grad()

    
    epoch_iterator = tqdm(train_dataloader, desc="Training")
    # model.train()
    scaler = GradScaler()

    for step, batch in enumerate(epoch_iterator):
        model.train()
        batch = tuple(t.to(args.device) for t in batch)
        inputs = {'input_ids':batch[0], 'attention_mask':batch[1], 
                    'token_type_ids':batch[2], 
                    'start_positions':batch[3],
                    'end_positions':batch[4],
                    'answerable_label':batch[5]}


        if args.model_type in ["xlm", "roberta", "distilbert", "camembert", "bart", "longformer"]:
                del inputs["token_type_ids"]

        if args.model_type in ['xlnet', 'xlm']:
                inputs.update({'cls_index': batch[6],
                               'p_mask':       batch[9]})
        with autocast():
            outputs = model(**inputs)
            loss = outputs[0]

            # if args.n_gpu > 1:
            #     loss = loss.mean()  # mean() to average on multi-gpu parallel training

        epoch_loss += loss.item()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()  # Update learning rate schedule
        optimizer.zero_grad()
        epoch_step += 1

        # evaluate model in some steps
        if (epoch_step % args.evaluate_steps == 0) :
            if max_f1 < val_results.get('f1'):
                max_f1 = val_results.get('f1')

                # logger.info('Epoch {} Training loss is {:.4f}'.format(epoch+1, epoch_loss/epoch_step))
                logger.info("***** Eval results %s *****", "")
                info = "-".join([f' {key}: {value:.4f} ' for key, value in val_results.items()])
                logger.info(info)

                # Save best model checkpoint
                output_dir = os.path.join(args.output_dir, args.model_type)
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                # Save weights of the network
                model_to_save = model.module if hasattr(model, "module") else model  # Take care of distributed/parallel training
                # model_checkpoint = {'epoch': epoch + 1,
                #             'state_dict': model_to_save.state_dict(),
                #             'optim_state_dict': optimizer.state_dict(),
                #             'scheduler_dict': scheduler.state_dict(),
                #             }
                # model_to_save.save_pretrained(output_dir)
                tokenizer.save_pretrained(output_dir)
                model_file_path = os.path.join(output_dir, 'qa-best.bin')
                torch.save(model_to_save.state_dict(), model_file_path)
                logger.info("Saving best model checkpoint to %s", output_dir)

    # if 'cuda' in str(args.device):
    # torch.cuda.empty_cache()

    return max_f1

for epoch in range(int(args.num_train_epochs)):
        # seed_everything(args.seed)
        logger.info('******************** Epoch {} Running Start! ********************'.format(epoch+1))
        max_f1 = train_and_evaluate(args,model, tokenizer, optimizer, scheduler, train_dataloader, 
        val_dataloader, epoch, max_f1)
        **last_evaluate_results = evaluate(args, model, tokenizer, val_dataloader)**

so, I find this method has not same evaluate in the end of every epoch. If I use evaulate every some steps in training, the evaluate result in the end of epoch is not same as only use evaluate in the end of every epoch.
Anyone can help me?Thanks!

As long as you are properly switching between model.train() (during training) and model.eval() (during evaluation) the validation loop should not have major influence on the training.

Note that you won’t be able to expect bitwise accurate results between both approaches (even if you are using only deterministic algorithms), since the validation loop could potentially call into the random number generator. This is usually uninteresting, but just for the sake of completeness I’m mentioning it here.

hi,thanks for your reply.But I find a intresting situration,the result in the first epoch is same use two methods,but the next epoch is not same,it is interesting :joy:

@ptrblck hi,thanks for your reply.But I find a intresting situration,the result in the first epoch is same use two methods,but the next epoch is not same,it is interesting :joy: