I have some question.When I training model,I use two method to evaluate model.One method is I evaluate model in the end of every epoch,like this:
def train_one_epoch(args, model, optimizer, scheduler, train_dataloader):
""" Train the model """
# Train!
logger.info("***** Running training *****")
logger.info(" Num examples = %d", len(train_dataloader)*args.train_batch_size)
epoch_step = 0
epoch_loss = 0.0
model.zero_grad()
# 下面这里读取batch数据需要根据自己的数据脚本进行修改
epoch_iterator = tqdm(train_dataloader, desc="Training")
# model.train()
scaler = GradScaler()
# 增加对抗训练代码
# fgm = FGM(model, epsilon=1, emb_name='word_embeddings.weight')
# pgd = PGD(model, emb_name='word_embeddings.weight', epsilon=1.0, alpha=0.3)
# k=3
for step, batch in enumerate(epoch_iterator):
model.train()
batch = tuple(t.to(args.device) for t in batch)
inputs = {'input_ids':batch[0], 'attention_mask':batch[1],
'token_type_ids':batch[2],
'start_positions':batch[3],
'end_positions':batch[4],
'answerable_label':batch[5]}
if args.model_type in ["xlm", "roberta", "distilbert", "camembert", "bart", "longformer"]:
del inputs["token_type_ids"]
if args.model_type in ['xlnet', 'xlm']:
inputs.update({'cls_index': batch[6],
'p_mask': batch[9]})
with autocast():
outputs = model(**inputs)
loss = outputs[0]
# if args.n_gpu > 1:
# loss = loss.mean() # mean() to average on multi-gpu parallel training
epoch_loss += loss.item()
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
scheduler.step() # Update learning rate schedule
optimizer.zero_grad()
epoch_step += 1
return epoch_loss / epoch_step
for epoch in range(int(args.num_train_epochs)):
logger.info('***** Epoch {} Running Start! *****'.format(epoch+1))
train_epoch_loss = train_one_epoch(args,model, optimizer, scheduler, train_dataloader)
**val_results = val_one_epoch(args, model, tokenizer,val_dataloader)**
The another method is like this:
def train_and_evaluate(args, model, tokenizer, optimizer, scheduler, train_dataloader, val_loader, epoch, max_f1):
""" Train the model """
# Train!
logger.info("***** Running training *****")
logger.info(" Num examples = %d", len(train_dataloader)*args.train_batch_size)
epoch_step = 0
epoch_loss = 0.0
model.zero_grad()
epoch_iterator = tqdm(train_dataloader, desc="Training")
# model.train()
scaler = GradScaler()
for step, batch in enumerate(epoch_iterator):
model.train()
batch = tuple(t.to(args.device) for t in batch)
inputs = {'input_ids':batch[0], 'attention_mask':batch[1],
'token_type_ids':batch[2],
'start_positions':batch[3],
'end_positions':batch[4],
'answerable_label':batch[5]}
if args.model_type in ["xlm", "roberta", "distilbert", "camembert", "bart", "longformer"]:
del inputs["token_type_ids"]
if args.model_type in ['xlnet', 'xlm']:
inputs.update({'cls_index': batch[6],
'p_mask': batch[9]})
with autocast():
outputs = model(**inputs)
loss = outputs[0]
# if args.n_gpu > 1:
# loss = loss.mean() # mean() to average on multi-gpu parallel training
epoch_loss += loss.item()
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
scheduler.step() # Update learning rate schedule
optimizer.zero_grad()
epoch_step += 1
# evaluate model in some steps
if (epoch_step % args.evaluate_steps == 0) :
if max_f1 < val_results.get('f1'):
max_f1 = val_results.get('f1')
# logger.info('Epoch {} Training loss is {:.4f}'.format(epoch+1, epoch_loss/epoch_step))
logger.info("***** Eval results %s *****", "")
info = "-".join([f' {key}: {value:.4f} ' for key, value in val_results.items()])
logger.info(info)
# Save best model checkpoint
output_dir = os.path.join(args.output_dir, args.model_type)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Save weights of the network
model_to_save = model.module if hasattr(model, "module") else model # Take care of distributed/parallel training
# model_checkpoint = {'epoch': epoch + 1,
# 'state_dict': model_to_save.state_dict(),
# 'optim_state_dict': optimizer.state_dict(),
# 'scheduler_dict': scheduler.state_dict(),
# }
# model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
model_file_path = os.path.join(output_dir, 'qa-best.bin')
torch.save(model_to_save.state_dict(), model_file_path)
logger.info("Saving best model checkpoint to %s", output_dir)
# if 'cuda' in str(args.device):
# torch.cuda.empty_cache()
return max_f1
for epoch in range(int(args.num_train_epochs)):
# seed_everything(args.seed)
logger.info('******************** Epoch {} Running Start! ********************'.format(epoch+1))
max_f1 = train_and_evaluate(args,model, tokenizer, optimizer, scheduler, train_dataloader,
val_dataloader, epoch, max_f1)
**last_evaluate_results = evaluate(args, model, tokenizer, val_dataloader)**
so, I find this method has not same evaluate in the end of every epoch. If I use evaulate every some steps in training, the evaluate result in the end of epoch is not same as only use evaluate in the end of every epoch.
Anyone can help me?Thanks!