Multi-thread usage of open_clip

Hi, I use open_clip with a pre-trained model on my fast Api server to compare the similarity of images, but after a lot of concurrent http requests to the server, it doesn’t run in parallel, it runs slower and slower. Please help me about it thanks

device = "cuda" if torch.cuda.is_available() else "cpu"
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16-plus-240', pretrained="laion400m_e32",device=device)

def imageEncoder(img):
    img1 = preprocess(img).unsqueeze(0).to(device)
    img1 = model.encode_image(img1)
    return img1


def generateScore(image1, image2):
    img1 = imageEncoder(image1)
    img2 = imageEncoder(image2)
    cos_scores = util.pytorch_cos_sim(img1, img2)
    score = round(float(cos_scores[0][0]) * 100, 2)
    return score



class Imagesclass(BaseModel):
    f_base64: str
    s_base64: str

@app.post("/cmp")
async def createcmp(imagelcass: Imagesclass):

    image_bytesone = base64.b64decode(imagelcass.f_base64)
    pil_image1 = Image.open(io.BytesIO(image_bytesone))
	
	mage_bytestwo = base64.b64decode(imagelcass.s_base64)
    pil_image2 = Image.open(io.BytesIO(mage_bytestwo))
	
	score = generateScore(pil_image1, pil_image2)
	
	
    return JSONResponse(status_code=status.HTTP_200_OK, content={"status": "ok", "score": str(score)})

i really need help about this problem.
thank you.

@ptrblck I need your help please.