Detectron model giving different results for different machines (constant seed)

My training script for the model:

seed = 42
import random 
import os
import numpy as np
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed)


from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data.catalog import Metadata

import os

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("experiment",)
cfg.DATASETS.TEST = ("test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.MODEL.DEVICE = "cuda"
cfg.SOLVER.IMS_PER_BATCH = 2
num_gpu = 1
bs = (num_gpu * 2)
cfg.SOLVER.BASE_LR = 0.02 * bs / 16
cfg.SOLVER.MAX_ITER = 7500   
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

My inference script on server-1 is:

import cv2
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7

cfg.SEED = 42
predictor = DefaultPredictor(cfg)

img = cv2.imread('filename.jpg')
outputs = predictor(img)
print(outputs["instances"])

pred_classes = outputs['instances'].pred_classes.tolist()
classes = ["Handwritten", "Logo", "Markings", "Signature"]

for pred_class in pred_classes:
    print('*'*10)
    print(classes[pred_class])
    print('*'*10)

if any(classes[pred_class] == "Handwritten" for pred_class in pred_classes):
    print(True)
else:
    print(False)

My inference script on server-2 is:

class Handwritten:
    """
    Detects a list of handwritten pages in a PDF chart.
    Attributes
    ----------
    path_of_model : str
        Path where the trained model is stored.
    path_of_weights : str
        Path where the weights file is stored.
    """

    def __init__(self, path_of_weights: str) -> None:
        """Initialize Handwritten class.
        Parameters
        ----------
        path_of_model : str
        path_of_weights : str
        """
        self.cfg = get_cfg()
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
        self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
        self.cfg.MODEL.WEIGHTS = path_of_weights
        self.cfg.MODEL.DEVICE = "cpu"
        self.cfg.SEED = 42
        self.predictor = DefaultPredictor(self.cfg)
        self.metadata = Metadata()
        self.metadata.set(
            thing_classes=["Handwritten", "Logo", "Markings", "Signature"],
            thing_dataset_id_to_contiguous_id={0: 0, 1: 1, 2: 2, 3: 3},
        )

    def __call__(self, img: Any) -> Any:
        """Return the predicted output classes for the image."""
        self.outputs = self.predictor(img)
        return self.outputs["instances"]

    def detect_hw(self, image: Any) -> bool:
        """Detect handwritten dx entity in image and if present then classifies it as hw page.
        Parameters
        ----------
        image : Matrix
            .Image matrix of a page.
        Return
        -------
        True/False : bool
            Boolean value that states if the page is handwritten or not.
        """
        outputs = self.__call__(image)
        pred_classes = outputs.pred_classes.tolist()
        classes = ["Handwritten", "Logo", "Markings", "Signature"]

        if any(classes[pred_class] == "Handwritten" for pred_class in pred_classes):
            return True
        else:
            return False


app = FastAPI()
path_of_weights = "model/model_final.pth"
model = Handwritten(path_of_weights)

@app.post("/cv/predict", status_code=200)
def predict(
    page_no: int = Form(...), dimensions: list = Form(...), image: UploadFile = File(...)
) -> Dict[str, int]:
    """Predicts if image is handwritten page or not.
    Parameters
    ----------
    page_no : Page number of the given input page
    dimensions : Height and width of the page
    image : Image of the page as bytestream
    """
    image_bytes = image.file.read()
    decoded_image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), -1)
    height, width = int(dimensions[0]), int(dimensions[1])
    prediction_time = time.time()
    pg_image = cv2.resize(decoded_image, (height, width))
    try:
        # Check if page is handwritten
        hw_result = model.detect_hw(pg_image)

        # If handwritten, consider for output
        if hw_result:
            hw_pages = page_no

        else:
            hw_pages = -99

        prediction_info = {
            "hw_pages": hw_pages,
            "prediction_time": prediction_time,
        }
        #_logger.info(f"prediction info: {prediction_info}")
    except HTTPError as e:
        do something
    
    return {"hw_pages": hw_pages}

While the model keeps giving good results on server-1, it is somehow being very erratic in server-2. The weights and the seed is the same. Somehow, I am unable to understand this change in behavior in both of these scenarios.

The model is trained on server-1

Server-1 is g4dn.2xlarge. Server-2 is g4dn.xlarge

Is there something wrong which I am doing?

Please correct me if I’m wrong, but isn’t the main difference between g4dn.2xlarge and g4dn.xlarge the increase in vCPUs, RAM, storage, and network bandwidth?
Both should have the same T4 GPU so unless the software stack differs I wouldn’t know what causes the difference (assuming you are using the GPU).

I’ve experienced issues with differing results on detectron2 as well:

Even using the PyTorch flags for deterministic training, in addition to setting the same random seed, didn’t fix it so far.