Network produces different output for the same image from different Dataloaders

Hello,
I am encountering a strange problem when testing my model. I have two datasets, and dataset 1 is a subset of dataset 2. When I take an image that exists in both datasets and pass it through the network, the network produces different outputs and therefore a different classification for the image depending on which Dataloader I load it from. I have checked that the tensors for both images are equal. In addition, I set model.eval(), set track_running_stats to False, and set the momentum for batch norm layers to 0. I have no idea why the network is outputting different results when the same tensor is given. By the way, my network is VGG-19 and the smaller subset dataset 1 is CIFAR-10, while the larger dataset 2 is a custom Pytorch Dataset I created which includes all of the CIFAR-10 images.

This is an example of the differing outputs for one single image.


MicrosoftTeams-image (2)

The code for outputting these results is here:

import os, sys
sys.path.append(os.getcwd())

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from tqdm import tqdm

from config import Config # pylint: disable=no-name-in-module
from test import test_model
import utils
from myclasses import CIFAR10Noise

from os.path import join as pathjoin
import shutil, argparse, datetime, json
from IPython import embed
from pynvml import *
from random import randrange
from copy import deepcopy
from collections import OrderedDict


def imshow(img):
    img = img / 5 + 0.48     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

####################

conf = Config(run_number=1)
net, device, handle = utils.prepare_net(conf.net(), conf.use_gpu)
model = (torch.load('/model_path', map_location=torch.device('cpu')))

# if running on CPU
if (device == 'cpu'):
    new_state_dict = OrderedDict()
    for k, v in model.items():
        name = k[7:] # remove 'module.' of dataparallel
        new_state_dict[name]=v
    net.load_state_dict(new_state_dict) # load model

    # turn off tracking running means in batch norm layers
    for child in net.children():
        if type(child) is (torch.nn.modules.container.Sequential):
            for layer in child:
                if isinstance(layer, nn.BatchNorm2d):
                    layer.track_running_stats = False
                    layer.momentum = 0
# if running on GPU
else:
    net.load_state_dict(model)# load model

    # turn off tracking running means in batch norm layer
    for child in net.module.children():
        if type(child) is (torch.nn.modules.container.Sequential):
            for layer in child:
                if isinstance(layer, nn.BatchNorm2d):
                    layer.track_running_stats = False
                    layer.momentum = 0

train_set = conf.dataset(train=True, transform=conf.train_transform)
train_set2 = CIFAR10Noise()

train_loader = DataLoader(train_set, batch_size=1500, shuffle=False, num_workers=conf.test_provider_count)
train_loader2 = DataLoader(train_set2, batch_size=1500, shuffle=False, num_workers=conf.test_provider_count)

# turn model to evaluation mode
net.eval()

print('check track_running_stats before forward passes:')
for child in net.children():
    if type(child) is (torch.nn.modules.container.Sequential):
        for layer in child:
            if isinstance(layer, nn.BatchNorm2d):
                print(layer.track_running_stats)
                print(layer.momentum)

print('check running weights before forward passes:')
for child in net.children():
    if type(child) is (torch.nn.modules.container.Sequential):
        print(child[1].weight)

# find matching pictures in each dataset
with torch.no_grad():
    
    # iterate through train set 1
    for data in train_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)

        # iterate through images in train set 1
        for i, img in enumerate(images):

            # for each image, iterate through train set 2
            for data2 in train_loader2:
                images2, labels2 = data2
                outputs2 = net(images2)
                _, predicted2 = torch.max(outputs2.data, 1)

                # iterate through images in train set 2
                for j, img2 in enumerate(images2):

                    # if image in train set 2 matches image from train set 1,
                    # print image and outputs
                    if torch.equal(img, img2):
                        print('image from original train set:')
                        imshow(images[i])
                        print('image from modified train set:')
                        imshow(images2[j])
                        print('prediction from original train set:')
                        print(classes[predicted[i].item()])
                        print('prediction from modified train set:')
                        print(classes[predicted2[j].item()])
                        print('output from original train set:')
                        print(outputs[i])
                        print('output from modified train set:')
                        print(outputs[j])
                        print('ground truth label from original train set:')
                        print(classes[labels[i].item()])
                        print('ground truth label from modified train set:')
                        print(classes[labels2[j].item()])

                        # check to see if batch norm states have changed
                        print('track_running_stats after forward pass:')
                        for child in net.children():
                            if type(child) is (torch.nn.modules.container.Sequential):
                                for layer in child:
                                    if isinstance(layer, nn.BatchNorm2d):
                                        print(layer.track_running_stats)
                                        print(layer.momentum)

                        # check that running means have not changed
                        print('running weights after forward pass:')
                        for child in net.children():
                            if type(child) is (torch.nn.modules.container.Sequential):
                                print(child[1].weight)

Sorry if my question is a bit simple, I am a beginner and would appreciate any help I can get! Please let me know if more information is needed. Thank you!!

I would recommend to sample the image from both DataLoaders and compare the output values.
If dataset1 is a subset of dataset2, the absolute error should be zero, since the same image would be loaded and processed in the same way (assuming that you are not using random transformations).

Your current implementations of conf.dataset and CIFAR10Noise are not defined.
Feel free to post the code for both methods, so that we could have a look at them. :wink:

1 Like

Thank you so much for your reply! In the code I included in the original post, I tested whether the image tensors were equal using torch.equal(img1, img2). I have also printed them out and manually checked before, so that is not the problem.
conf.dataset is just the torchvision CIFAR10 dataset, and CIFAR10Noise is a custom Pytorch Dataset I created. This is the code that I ran to create the dataset and store it as a pickled list:

def noisy(image):
    row,col,ch= image.shape
    mean = 0
    gauss = np.random.normal(mean,0.5,(row,col,ch))
    gauss = gauss.reshape(row,col,ch)
    noisy = image + gauss
    return noisy

train_set = torchvision.datasets.CIFAR10()

dataList = []
for i, entry in enumerate(train_set):
    dataList.append(list(entry)) 

newList = []
for i, entry in enumerate(dataList):
    newEntry = []
    newEntry.append(torch.Tensor(noisy(entry[0]).float()))
    newEntry.append(randrange(10)
    newList.append(newEntry)
    del newEntry

newList = newList + dataList

with open('/save_location', 'wb') as f:
    pickle.dump(newList, f)

And this is the code for the CIFAR10Noise Dataset class itself:

class CIFAR10Noise(Dataset):

    def __init__(self):
        with open('/save_location', 'rb') as f:
            self.dataList = pickle.load(f)

    def __len__(self):
        return (len(self.dataList))

    def __getitem__(self, idx):
        return (tuple(self.dataList[idx]))

I do not think it is a problem with the dataset itself, as I have isolated the subset from CIFAR10Noise and created a custom Dataset the same way by copying over the exact tensors, and the network still produces different outputs. I can post that code as well if needed.

If the inputs are equal, you’ve called model.eval(), and the output is still different, you could check the layers one by one using forward hooks as described here.
This approach would allow you to isolate which layer creates the first difference and would help debug this issue further.

1 Like

Okay, so I put a forward hook on the first layer (conv2d) and the inputs and outputs were the same. So I tried it on the second layer (batchnorm2d) and the inputs were the same, but the outputs were different. However, I have checked that all the parameters in the state dict are the same, which I think are the weights and biases. I have no idea why the batch norm outputs are different; what else could be causing this difference? Or perhaps my code for checking the parameters is incorrect. My code is here:

#################

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader

from config import Config # pylint: disable=no-name-in-module
import utils
from myclasses import CIFAR10Noise

from IPython import embed
from pynvml import *
from random import randrange
from copy import deepcopy
from collections import OrderedDict

def compare_models(model_1, model_2):
    models_differ = 0
    for key_item_1, key_item_2 in zip(model_1.state_dict().items(), model_2.state_dict().items()):
        if torch.equal(key_item_1[1], key_item_2[1]):
            pass
        else:
            models_differ += 1
            if (key_item_1[0] == key_item_2[0]):
                print('Mismatch found at', key_item_1[0])
            else:
                raise Exception
    if models_differ == 0:
        print('Models match perfectly! :)')

# A simple hook class that returns the input and output of a layer during forward/backward pass
class Hook():
    def __init__(self, module, backward=False):
        if backward==False:
            self.hook = module.register_forward_hook(self.hook_fn)
        
    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output
    def close(self):
        self.hook.remove()


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

####################

conf = Config(run_number=1)
net, device, handle = utils.prepare_net(conf.net(), conf.use_gpu)
model = (torch.load('/model_path', map_location=torch.device('cpu')))

# if running on CPU
if (device == 'cpu'):
    new_state_dict = OrderedDict()
    for k, v in model.items():
        name = k[7:] # remove 'module.' of dataparallel
        new_state_dict[name]=v
    net.load_state_dict(new_state_dict) # load model

    # turn off tracking running means in batch norm layers
    for child in net.children():
        if type(child) is (torch.nn.modules.container.Sequential):
            for layer in child:
                if isinstance(layer, nn.BatchNorm2d):
                    layer.track_running_stats = False
                    layer.momentum = 0
# if running on GPU
else:
    net.load_state_dict(model)# load model

    # turn off tracking running means in batch norm layer
    for child in net.module.children():
        if type(child) is (torch.nn.modules.container.Sequential):
            for layer in child:
                if isinstance(layer, nn.BatchNorm2d):
                    layer.track_running_stats = False
                    layer.momentum = 0

# small data set (subset of larger set)
train_set = torchvision.datasets.CIFAR10()
# large data set
train_set2 = CIFAR10Noise()


train_loader = DataLoader(train_set, batch_size=1500, shuffle=False, num_workers=conf.test_provider_count)
train_loader2 = DataLoader(train_set2, batch_size=1500, shuffle=False, num_workers=conf.test_provider_count)

# turn model to evaluation mode
net.eval()

# set hook on first batchnorm layer: features[1]
if (device == 'cpu'):
    hookF = Hook(net.features[1])
else:
    hookF = Hook(net.module.features[1])


"""
With both Dataloaders in nonshuffle mode, the matching images are in fixed locations.

Train loader 1 batch number : 0
i =  0

Train loader 2 batch number:  37
j =  166
"""

# copy net before evaluation so I can compare parameters after forward passes, 
# to see if any parameters changed
net2 = deepcopy(net)

# find matching images and save the hook inputs and outputs of their 
# respective batches
with torch.no_grad():
    
    # get batch with desired image from first dataset
    for batchNum1, data in enumerate(train_loader):
        if (batchNum1 == 0):
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)

            # forward hook on batchnorm layer
            hookInput1 = hookF.input[0]
            hookOutput1 = hookF.output
            break

    print('See if model changed after train loader 1 forward pass')
    compare_models(net, net2)

    # get batch with desired image from second dataset
    for batchNum2, data2 in enumerate(train_loader2):
        if (batchNum2 == 37):
            images2, labels2 = data2
            outputs2 = net(images2)
            _, predicted2 = torch.max(outputs2.data, 1)

            # forward hook on batchnorm layer
            hookInput2 = hookF.input[0]
            hookOutput2 = hookF.output
            break
    
    print('See if model changed after train loader 2 forward passes')
    compare_models(net, net2)
    
    # indices of the desired matching image
    i = 0
    j = 166

    # store batch norm layer inputs and outputs from first dataset batch
    in1 = hookInput1[i]
    out1 = hookOutput1[i]

    # store batch norm layer inputs outputs from second dataset batch
    in2 = hookInput2[j]
    out2 = hookOutput2[j]

    if torch.equal(in1, in2):
        print('batch norm inputs are equal')
    else:
        print('batch norm inputs not equal')

    if torch.equal(out1, out2):
        print('batch norm outputs are equal')
    else:
        print('batch norm outputs not equal')

And the output from this is:

See if model changed after train loader 1 forward pass
Models match perfectly! :)
See if model changed after train loader 2 forward passes
Models match perfectly! :)
batch norm inputs are equal
batch norm outputs not equal

By the way, I checked and the running mean and variance are the same before and after the forward passes.

Could you check the max. abs. difference between the batch norm outputs and post it here, please?