Hello guys,
I got the memory leak in the code below(both on CPU and GPU):
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
import gc
import numpy as np
from memory_profiler import profileclass KVEmbedding:
_instance = Nonedef __new__(cls, *args, **kwargs): if cls._instance is None: cls._instance = super(KVEmbedding, cls).__new__(cls) return cls._instance def __init__(self, device='cpu'): if not hasattr(self, 'initialized'): # To prevent reinitialization self.device = device # Load tokenizer and model from pretrained multilingual-e5-small self.tokenizer = AutoTokenizer.from_pretrained("intfloat/multilingual-e5-small") self.model = AutoModel.from_pretrained("intfloat/multilingual-e5-small").to(self.device) self.initialized = True # Mark as initialized def average_pool(self, last_hidden_states, attention_mask): # Apply mask to hidden states, set masked positions to 0 last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0) # Average the hidden states along the sequence dimension return last_hidden.sum(dim=1) / attention_mask.sum(dim=1, keepdim=True) @profile def embedding(self, l_transcription, batch_size=32): batch_dict = self.tokenizer( l_transcription, max_length=512, padding=True, truncation=True, return_tensors="pt", ).to(self.device) input_ids, attention_mask = batch_dict["input_ids"], batch_dict["attention_mask"] del batch_dict gc.collect() num_batches = (len(input_ids) + batch_size - 1) // batch_size embeddings_list = [] for i in range(num_batches): start, end = i * batch_size, min((i + 1) * batch_size, len(input_ids)) batch_input_ids, batch_attention_mask = input_ids[start:end], attention_mask[start:end] outputs = self.model(input_ids=batch_input_ids, attention_mask=batch_attention_mask) embeddings = self.average_pool(outputs.last_hidden_state, batch_attention_mask) embeddings = abs(F.normalize(embeddings, p=2, dim=1)).detach().cpu().numpy() embeddings_list.append(embeddings) # Clear GPU memory if using CUDA if torch.cuda.is_available(): torch.cuda.empty_cache() del batch_input_ids, batch_attention_mask, outputs, embeddings, start, end gc.collect() all_embeddings = np.concatenate(embeddings_list, axis=0) del embeddings_list, num_batches gc.collect() return all_embeddings
def main():
kvembedding = KVEmbedding(device=ācpuā)
i = 0
while 1:
output = kvembedding.embedding([ātranscriptionsā]*150)
i +=1
print(i)
del output
gc.collect()if name == āmainā:
main()
Python: 3.8.11
PyTorch: 1.7.1
Transformers: 4.30.1
Any idea to fix this problem?