Test result is different


import os
import time
import json
import argparse
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torchvision.transforms import Normalize
from sklearn.metrics import confusion_matrix  

from sklearn.metrics import f1_score

def compute_iou(y_pred, y_true):
     # ytrue, ypred is a flatten vector
    y_pred = y_pred.flatten()
    y_true = y_true.flatten()
    current = confusion_matrix(y_true, y_pred, labels=[0, 255])
     # compute mean iou
    intersection = np.diag(current)
    ground_truth_set = current.sum(axis=1)
    predicted_set = current.sum(axis=0)
    union = ground_truth_set + predicted_set - intersection
    IoU = intersection / union.astype(np.float32)
    wall,crack=IoU
    return np.mean(IoU)
def preprocess(image):
    input_size= 512
    image = cv2.resize(image, (input_size,input_size), interpolation=cv2.INTER_LINEAR)
    #image = image.astype(np.float32) / 255.0
    #image = (image - mean) / std
    X = np.transpose(image, axes=(2, 0, 1))
    X = np.expand_dims(X, axis=0)
    X = torch.tensor(X, dtype=torch.float32)
    return X

from mobilenetv3_encoder4 import MobileNetV3
if __name__ == '__main__':

    path=os.getcwd()
    test_path=path+"/test/test_image_1/"
    mask_path=path+"/test/test_mask_1/"
    data_list=os.listdir(test_path)

    model_path=path+"/models/double_loss/"
    model_list=os.listdir(model_path)
    model = MobileNetV3()


    batch = torch.FloatTensor(1, 3, 1024, 512)

    #################### common model setting and opt setting  #######################################
    for j in model_list:
        f1_list=[]
        if torch.cuda.device_count() > 0:
            model.load_state_dict(torch.load(model_path+j))
        else:
            model.load_state_dict(torch.load(Max_name, "cpu"))

        use_cuda = torch.cuda.is_available()
        num_gpu = torch.cuda.device_count()

        if use_cuda:
            print("Use gpu : %d" % torch.cuda.device_count())
            #if num_gpu > 1:
            #    model = torch.nn.DataParallel(model)
            #    print("make DataParallel")
            model = model.cuda()
            print("Done")
        imgl=[]
#        model.eval()
        ###################################stage Enc setting ##############################################
        for i in data_list:

            img_name = i
            i2=i[:-4]

            
            imgW, imgH = 1024, 512
            img = cv2.imread(test_path+img_name)
            (h, w) = img.shape[:2]
            img_orig = np.copy(img)


            img = img.astype(np.float32)
            #for k in range(3):
            #    img[:, :, k] -= mean[k]
            #for j in range(3):
            #    img[:, :, k] /= std[k]

            img /= 255
            img = img.transpose((2, 0, 1))
            img_tensor = torch.from_numpy(img)
            norm=Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

            img_tensor=norm(img_tensor)
            img_tensor = torch.unsqueeze(img_tensor, 0)  # add a batch dimension
            with torch.no_grad():
                img_variable = torch.autograd.Variable(img_tensor)

                if torch.cuda.is_available():
                    img_variable = img_variable.cuda()
                    #img_variable2 = img_variable2.cuda()

                img_coarse,img_out = model(img_variable)

            Lovasz=True
            if Lovasz:
                classMap_numpy = (img_out[0].data.cpu() > 0.001).numpy()[0]
                classMap_numpy2 = (img_out[0].data.cpu() > 0).numpy()[0]
                classMap_numpy3= (img_coarse[0].data.cpu() > 0).numpy()[0]

            else:
                classMap_numpy = img_out[0].max(0)[1].byte().data.cpu().numpy()


            idx_fg = (classMap_numpy == 1).astype(np.float)
            idx_fg2 = (classMap_numpy2 == 1).astype(np.float)
            idx_fg3 = (classMap_numpy3 == 1).astype(np.float)
            #idx=idx_fg2+idx_fg
            mask = idx_fg.astype(np.uint8)*255
            mask2 = idx_fg2.astype(np.uint8)*255
            mask3 = idx_fg3.astype(np.uint8)*255
            cv2.imwrite("./data/mask1/"+"%s.png"%i2,mask)
            cv2.imwrite("./data/mask2/"+"%s.png"%i2,mask2)
            cv2.imwrite("./data/mask5/"+"%s.png"%i2,mask3)
            #mask = idx.astype(np.uint8)
            #mask_list=np.unique(mask)
            #for k in mask_list:
            #    if k>0:
            #        mask[mask==k]=255
            blur = cv2.GaussianBlur(mask,(5,5),0)
            ret3,th3 = cv2.threshold(mask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
            
            cv2.imwrite("./data/mask3/"+"%s.png"%i2,th3)
        
            #ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

            nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(th3, connectivity=8)

            sizes = stats[1:, -1]; nb_components = nb_components - 1

            min_size = 50  
            
            
        #your answer image
            img2 = np.zeros((output.shape))
        #for every component in the image, you keep it only if it's above min_size
            for i in range(0, nb_components):
                if sizes[i] >= min_size:
                    img2[output == i + 1] = 255

            cv2.imwrite("./data/mask4/"+"%s.png"%i2,img2)

            #idx_fg= cv2.resize(idx_fg, (w, h))
            #cv2.imwrite("mask_"+img_name, idx_fg.astype(np.uint8)*255)
            gt= cv2.imread(mask_path+i2+".png")[:,:,0]
            #gt=cv2.resize(gt,(1024,512),interpolation=cv2.INTER_NEAREST)
            blur2 = cv2.GaussianBlur(gt,(5,5),0)

            ret3,c = cv2.threshold(blur2,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
            cr=compute_iou(c,img2)
            f1_list.append(cr)
            imgl.append(i2)
        miou=np.mean(f1_list)
        print(j,miou)

Hello. above code is my test code for semantic segmentation.
I have problem that whenever I test the code with same model, the result is slightly different.
I am not sure what is good solution.
Also, some concern is that whenever I change something should I use the np.copy() or not.
Becuase when I learn the Opencv, I remeber that we should use the copy() becuase otherwise it change the object value.

Thank you.

It seems you’ve removed the model.eval() call, which would disable dropout and use the running stats in batchnorm layers instead of the sample statistics.
Could you call model.eval() again and check the results?
Also, how large are the differences between the runs?

The original author of this model use the test code I just change minor things such as otsu stuff.

 with torch.no_grad():
                img_variable = torch.autograd.Variable(img_tensor)

                if torch.cuda.is_available():
                    img_variable = img_variable.cuda()

So I use this part.
I will also try to use the eval again. eval actually little bit decrease result.
The difference is around 0.0005 average iou.
Thank you.