I am trying to fine tune the model. and model is training / learning the QA tasks and I am getting more better answers. But validation loss is increasing
Here is my function for training
for e in range(3):
train_loss = 0.0
model.train()
tbar = tqdm(batches)
counter = 0
losses = []
for index, batch in enumerate(tbar):
# Transfer Data to GPU if available
optimizer.zero_grad()
input_ids, attention_mask, labels = batch['input_ids'], batch['attention_mask'], batch['labels']
if torch.cuda.is_available():
input_ids, attention_mask, labels = input_ids.cuda(), attention_mask.cuda(), labels.cuda()
# Clear the gradients
# Forward Pass
output = model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels)
loss, target = output.loss, output.logits
# Calculate gradients
loss.backward()
# Update Weights
optimizer.step()
# Calculate Loss
train_loss += (loss.item() * 8)
counter += 1
losses.append(train_loss / (counter * 8))
tbar.set_postfix({'loss': train_loss / (counter * 8)})
valid_loss = 0.0
model.eval() # Optional when not using Model Specific layer
vbar = tqdm(validloader)
counter = 0
with torch.no_grad():
for index, batch in enumerate(vbar):
# Transfer Data to GPU if available
input_ids, attention_mask, labels = batch['input_ids'], batch['attention_mask'], batch['labels']
if torch.cuda.is_available():
input_ids, attention_mask, labels = input_ids.cuda(), attention_mask.cuda(), labels.cuda()
output = model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels)
loss, target = output.loss, output.logits
# Calculate gradients
# Calculate Loss
valid_loss += loss.item()
losses.append(valid_loss)
counter += 1
vbar.set_postfix({'loss': valid_loss / (counter)})
#tk1.close()
filename = './saved_model' + str(e) + '.pt'
torch.save(model.state_dict(), filename)
print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(trainloader)} \t\t Validation Loss: {valid_loss / len(validloader)}')
if min_valid_loss > valid_loss:
print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
min_valid_loss = valid_loss
# Saving State Dict
torch.save(model.state_dict(), './saved_model.pth')
can someone please see into it ?
Training loss is decreasing but validation loss is increasing.
Also training dataset = 65K examples and valid set = 27K examples