I have a very basic training loop that is getting much slower over time. According to some other help I saw online I added torch.cuda.empty_cache()
but that is not fixing it. Any suggestions?
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Device used for training: {device}')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=1)
model = torch.nn.DataParallel(model)
model.to(device)
model.train()
optim = AdamW(model.parameters(), lr=5e-5)
batch_iter = 0
training_loss = 0
num_batches = len(train_dataloader)
sns.set_style('darkgrid')
for epoch in range(3):
for batch in tqdm(train_dataloader):
batch_iter += 1
optim.zero_grad()
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = torch.logit(batch['label']).to(device)
outputs = model(input_ids=input_ids, attention_mask=attention_mask,
labels=labels)
loss = outputs[0]
loss = loss.mean()
training_loss += loss.item()/(num_batches/100)
if batch_iter % round(num_batches/100) == 0:
writer.add_scalar('Loss/train',training_loss,batch_iter)
training_loss = 0
loss.backward()
optim.step()
torch.cuda.empty_cache()
model.module.save_pretrained(f"checkpoints/model_epoch_{epoch}")