I got this problem and tried to reduce the batch_size but the error still
labels=decoder_input_ids RuntimeError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 5.80 GiB total capacity; 4.62 GiB already allocated; 2.56 MiB free; 4.72 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
the code is
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
num_epochs = 5
for epoch in range(num_epochs):
model.train()
total_loss = 0
for batch in dataloader:
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
decoder_input_ids = batch['decoder_input_ids'].to(device)
decoder_attention_mask = batch['decoder_attention_mask'].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask,
decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask,
labels=decoder_input_ids
)
loss = outputs.loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)