Why the weight still change?

I came into sth that really strange, this is my code

def model_eval(model):
    model_the_wrong_images = []
    model_the_real_label = []
    model_the_pred = []
    correct = 0
    total = 0
    with torch.no_grad():  
        model.eval()
        for idx, test in enumerate(test_loader):

            image, label = test
            total += image.size()[0]

            pred = torch.argmax(model(image), 1)
            correct += (pred ==  label).sum().item()
        
            if idx == 4:
                #print(model(image))
                print(torch.argmax(model(image), 1))
                print(pred)
                print(torch.argmax(model(image), 1))
                print(pred == torch.argmax(model(image), 1))
                print(correct)
                print(label)

            for i in range(len(image)):
                if not (pred == label)[i].item():
                    model_the_wrong_images.append(image[i])
                    model_the_real_label.append(label[i])
                    model_the_pred.append(torch.argmax(model(image[i].unsqueeze_(0)), 1))

    print(correct)
    print(total)
    print('The accuracy on testset is : %f'%(correct / total))
    
    return model_the_wrong_images, model_the_real_label, model_the_pred

I write this func to eval my model, but i found that once i repeat the eval process with the model that has already been trained, the accuracy changes everytime.

Why my model’s params can’t keep const??? I wrote that model.eval(), but it seems no work

So i print the output and found the output change.Go futrher, i print the output of the 4th subset of the test process to see, something terrible happen, the outputs of the if are:

tensor([9, 7, 2, 4])
tensor([9, 7, 3, 4])
tensor([9, 7, 3, 4])
tensor([1, 1, 1, 1], dtype=torch.uint8)
17
tensor([9, 7, 3, 4])

How could the pred and torch.argmax(model(image), 1) become two different things?I get pred by pred = torch.argmax(model(image), 1)!

Who can tell me why! Thanks a lot!!!

Anyone Can help me?:joy::joy::joy::joy:

did you try setting torch.backends.cudnn.deterministic = True

Thanks!
I run the script on cpu. Do i need to do any change to the code?

Still no use…
So frustrated

I think the direct way to see whether the model changes is to print the parameters out.

As you suggest, i found the params seems keep const, but the eval process always get different result

Can you post the full code, so that I can see if you are using any layers that has some randomization, and you might also be using test time augmentation(TTA)

Set shuffle=False and no_flip or something if you add flipping as a kind of data augmentation.

import torch
import torchvision
transform = torchvision.transforms.Compose(
                    [torchvision.transforms.ToTensor(),
                     torchvision.transforms.Normalize([0.5, 0.5, 0.5],[0.5, 0.5, 0.5])])
trainset = torchvision.datasets.MNIST(download=True, root='./data/MNIST',
                                    train=True,transform=transform)
testset = torchvision.datasets.MNIST(download=True, root='./data/MNIST',
                                    train=False,transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=trainset, shuffle=False,
                            batch_size=64)
test_loader = torch.utils.data.DataLoader(dataset=testset, shuffle=False,
                            batch_size=4)
import torch.nn.functional as F
import torch.nn as nn

class Conv_Classifier(nn.Module):
    def __init__(self):
        super(Conv_Classifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 5, 5)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(5, 16, 5)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(256, 20)
        self.fc2 = nn.Linear(20, 10)

    def forward(self, x):
        x = F.relu(self.pool1((self.conv1(x))))
        x = F.relu(self.pool2((self.conv2(x))))
        x = F.dropout2d(x)
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

Mnist_Classifier = Conv_Classifier()

def model_train(model, optimizer):
    total_loss = 0
    model.train()
    for i in range(5):

        for idx, data in enumerate(train_loader):
            image, label = data
        
            optimizer.zero_grad()
            pred = model(image)
            loss = loss_fn(pred, label)
            loss.backward()
            optimizer.step()
        
            total_loss += loss
            if idx % 149 == 0:
                print('[epoch %(epoch)d batch %(batch)d] loss : '%{'epoch':i, 'batch':idx}, loss)
        print('Total loss after %(epoch)d epoch'%{'epoch':i}, total_loss)
        total_loss=0

model_train(Mnist_Classifier, torch.optim.SGD(Mnist_Classifier.parameters(),lr=0.01, momentum=0.9))

the_wrong_images = []
the_real_label = []
the_pred = []

def model_eval(model):
    model_the_wrong_images = []
    model_the_real_label = []
    model_the_pred = []
    correct = 0
    total = 0
    with torch.no_grad():  
        model.eval()
        for idx, test in enumerate(test_loader):

            image, label = test
            total += image.size()[0]

            pred = torch.argmax(model(image), 1)
            correct += (pred ==  label).sum().item()
      
    
        
            for i in range(len(image)):
                if not (pred == label)[i].item():
                    model_the_wrong_images.append(image[i])
                    model_the_real_label.append(label[i])
                    model_the_pred.append(torch.argmax(model(image[i].unsqueeze_(0)), 1))

    print(correct)
    print(total)
    print('The accuracy on testset is : %f'%(correct / total))
    
    return model_the_wrong_images, model_the_real_label, model_the_pred

the_wrong_images, the_real_label, the_pred = model_eval(Mnist_Classifier)

It is just a simple example on Mnist, but if you repeat the model_eval func, you will find the accuracy will change…

Since you are using the functional API for F.dropout, it will be always set to training, i.e. it’ll always drop your activations.
You could use the module API nn.Dropout(p=0.5) or alternatively pass the self.training flag to your current code:

x = F.dropout(x, training=self.training)
5 Likes

I did, but the output will change even for the same input

My code is below, so sad

Thank you!
My problem is solved
Really a foolish mistake
Wish u a good day!

1 Like