Memory Leak with a simple code

Hello guys,
I got the memory leak in the code below(both on CPU and GPU):

import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
import gc
import numpy as np
from memory_profiler import profile

class KVEmbedding:
_instance = None

def __new__(cls, *args, **kwargs):        
    if cls._instance is None:            
        cls._instance = super(KVEmbedding, cls).__new__(cls)        
    return cls._instance    

def __init__(self, device='cpu'):        
    if not hasattr(self, 'initialized'):  
        # To prevent reinitialization            
        self.device = device            
        # Load tokenizer and model from pretrained multilingual-e5-small            
        self.tokenizer = AutoTokenizer.from_pretrained("intfloat/multilingual-e5-small")            
        self.model = AutoModel.from_pretrained("intfloat/multilingual-e5-small").to(self.device)            
        self.initialized = True  # Mark as initialized

def average_pool(self, last_hidden_states, attention_mask):
    # Apply mask to hidden states, set masked positions to 0
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    # Average the hidden states along the sequence dimension
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1, keepdim=True)

@profile
def embedding(self, l_transcription, batch_size=32):
    batch_dict = self.tokenizer(
        l_transcription,
        max_length=512,
        padding=True,
        truncation=True,
        return_tensors="pt",
    ).to(self.device)

    input_ids, attention_mask = batch_dict["input_ids"], batch_dict["attention_mask"]
    del batch_dict        
    gc.collect() 
    num_batches = (len(input_ids) + batch_size - 1) // batch_size
    embeddings_list = []

    for i in range(num_batches):
        start, end = i * batch_size, min((i + 1) * batch_size, len(input_ids))
        batch_input_ids, batch_attention_mask = input_ids[start:end], attention_mask[start:end]
        outputs = self.model(input_ids=batch_input_ids, attention_mask=batch_attention_mask)
        embeddings = self.average_pool(outputs.last_hidden_state, batch_attention_mask)
        embeddings = abs(F.normalize(embeddings, p=2, dim=1)).detach().cpu().numpy()
        embeddings_list.append(embeddings)
        # Clear GPU memory if using CUDA            
        if torch.cuda.is_available():                
            torch.cuda.empty_cache()
        del batch_input_ids, batch_attention_mask, outputs, embeddings, start, end
        gc.collect() 

    all_embeddings = np.concatenate(embeddings_list, axis=0)
    del embeddings_list, num_batches
    gc.collect()
    return all_embeddings

def main():
kvembedding = KVEmbedding(device=ā€˜cpuā€™)
i = 0
while 1:
output = kvembedding.embedding([ā€œtranscriptionsā€]*150)
i +=1
print(i)
del output
gc.collect()

if name == ā€œmainā€:
main()

Python: 3.8.11
PyTorch: 1.7.1
Transformers: 4.30.1

Any idea to fix this problem?

1 Like

I have the same issue

Hello. By any chance, have you found a solution to this ? Iā€™m facing a similar problem

Hello! Have you come across a solution for this issue? Iā€™m experiencing a similar problem.