Accuracy decreases after saving and loading

I am training a simple binary classifier. During training, my validation accuracy reaches to 80% but when I save the model and load it again and evaluate then the accuracy drops to 60% and loss also increases.

Here is the code

class Classifier(object):

    def __init__(self, classifier, model_name, loss, optim_kwargs, dataset, batch_size, model_dataset,
                 optim=None, epochs=10, tf_log_path=None, use_cuda=None, classifier_params=None):
        """
        :param classifier: Classifier Network
        :param model_name: Model Name
        :param recon_loss: Reconstruction Loss
        :param dataset: Dataset
        :param batch_size: Batch Size
        :param optim: Optimizer
        :param lr: Learning Rate
        :param epochs: Number of epochs
        :param tf_log_path: Tensorflow Log Path
        """
        self.classifier = classifier
        if classifier_params is None or not len(classifier_params):
            classifier_params = filter(lambda x: x.requires_grad, self.classifier.parameters())
            params = sum([np.prod(p.size()) for p in self.classifier.parameters()])
            rootLogger.info("Trainable Parameters : " + str(params))
        optim = optim or torch.optim.Adam
        optim_kwargs = optim_kwargs or {}
        optim_kwargs.setdefault('lr', 1e-3)
        self.optim = optim(classifier_params, **optim_kwargs)
        self.epochs = epochs
        self.loss = loss
        self.model_name = model_name
        self.dataset = dataset
        self.model_dataset = model_dataset
        self.batch_size = batch_size
        self.use_cuda = use_cuda
        self.scheduler = StepLR(self.optim, step_size=10, gamma=0.5)
        if self.use_cuda is None:
            self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.classifier.cuda()
        if tf_log_path is not None:
            self.logger = Logger(model_name=self.model_name, data_name=self.dataset, log_path=tf_log_path)

    def train_classifier(self, train_loader, val_loader, model_path):
        """
        Function for training
        :param train_loader: Loader for training data
        :param val_loader: Loader for test data
        :param model_path: Path for saving the data
        """

        # results save folder
        model_current = model_path + '/classifier/' + self.dataset + '/current/' + self.model_name + '.pt'
        model_best = model_path + '/classifier/' + self.dataset + '/best/' + self.model_name + '.pt'
        try:
            rootLogger.info("Loading Saved Model")
            checkpoint = torch.load(model_current)
            self.classifier.load_state_dict(checkpoint)
            rootLogger.info("Saved Model successfully loaded")
        except:
            rootLogger.info("Model not found, Created a new one")
            # Make directory for Saving Models
            if not os.path.isdir(model_path + 'classifier/' + self.dataset + '/current/'):
                makedirs(model_path + 'classifier/' + self.dataset + '/current/')
            if not os.path.isdir(model_path + 'classifier/' + self.dataset + '/best/'):
                makedirs(model_path + 'classifier/' + self.dataset + '/best/')

        # training-loop
        np.random.seed(int(time.time()))
        rootLogger.info('Training Start!!!')

        best_loss = np.Inf
        best_accuracy = 0.0

        for epoch in range(self.epochs):
            self.classifier.train()
            epoch_start_time = time.time()
            epoch_train_loss = 0.
            correct = 0
            total = 0

            # Checkpoint after 5 epochs
            if epoch % 10 == 0:
                try:
                    rootLogger.info("Saving the current model")
                    torch.save(self.classifier.state_dict(), model_current)
                    rootLogger.info("Current model saved")
                except:
                    rootLogger.info("Can't save the model")

            for epoch_iter, data in enumerate(train_loader):
                input_image, labels = data
                # Move the images to the device first before computation
                if self.use_cuda:
                    input_image = input_image.cuda()
                    labels = labels.cuda()
                input_image = Variable(input_image)
                labels = Variable(labels)

                self.optim.zero_grad()  # clear gradients for this training step

                pred_labels = self.classifier(input_image)
                loss = self.loss(pred_labels, labels)
                epoch_train_loss += loss.item()

                loss.backward()  # backpropagation, compute gradients
                self.optim.step()  # apply gradients

                _, predicted = torch.max(pred_labels.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            avg_loss = epoch_train_loss / len(train_loader)

            # Calculate accuracy for current epoch
            accuracy = 100 * correct / total

            # Log the training losses and accuracy
            self.logger.log(mode="train", error=avg_loss, epoch=epoch + 1, n_batch=0, num_batches=1)
            self.logger.log_scores(mode="train", acc=accuracy, epoch=epoch + 1)

            epoch_end_time = time.time()
            per_epoch_ptime = epoch_end_time - epoch_start_time
            rootLogger.info('[%d/%d] - ptime: %.2f Train loss: %.3f  Train Acc: %.2f' % (
                (epoch + 1), self.epochs, per_epoch_ptime, avg_loss, accuracy))

            # Validation
            val_loss, val_acc = self.evaluate_val_data_tf(val_loader=val_loader, epoch=epoch)
            # Save the best model
            if val_acc > best_accuracy:
                self.classifier.train()
                best_loss = val_loss
                best_accuracy = val_acc
                torch.save(self.classifier.state_dict(), model_best)
                rootLogger.info("Best model saved/updated..")
            self.scheduler.step()

    def evaluate_val_data_tf(self, val_loader, epoch):
        """
        Function to evaluate the results on trained model
        :param val_loader: data loader on which clustering is evaluated
        :param epoch:
        :return: None
        """
        self.classifier.eval()
        # Load the parameters of pretrained model
        # checkpoint = torch.load(model_name)
        # Evaluate the results on current model
        epoch_train_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for epoch_iter, data in enumerate(val_loader):
                latent_space, labels = data
                if self.use_cuda:
                    latent_space = latent_space.cuda()
                    labels = labels.cuda()
                latent_space = Variable(latent_space)
                labels = Variable(labels)

                pred_labels = self.classifier(latent_space)

                _, predicted = torch.max(pred_labels, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = self.loss(pred_labels, labels)
                epoch_train_loss += loss.item()

            avg_loss = epoch_train_loss / len(val_loader)
            accuracy = 100 * correct / total

            rootLogger.info("Val loss = [%.3f], Val Acc = [%.2f]" % (avg_loss, accuracy))
            self.logger.log(mode="val", error=avg_loss, epoch=epoch, n_batch=0, num_batches=1)
            self.logger.log_scores(mode="val", acc=accuracy, epoch=epoch + 1)

        return avg_loss, accuracy

    def evaluate_val_data(self, val_loader, model_path):
        """
        Function to evaluate the results on trained model
        :param val_loader: data loader on which clustering is evaluated
        :param model_path: name with which pre-trained model is saved
        :return: None
        """

        try:
            rootLogger.info("Loading Saved Model")
            checkpoint = torch.load(
            model_path + '/classifier/' + self.model_dataset + '/best/' + self.model_name + '.pt')
            self.classifier.load_state_dict(checkpoint)
            rootLogger.info("Saved Model successfully loaded")
        except:
            rootLogger.info("Model not found.")

        train_loss = 0
        correct = 0
        total = 0

        classes = ('orig', 'f2f')

        class_correct = list(0. for i in range(2))
        class_total = list(0. for i in range(2))
        self.classifier.eval()
        with torch.no_grad():
            for epoch_iter, data in enumerate(val_loader):
                image, labels = data
                if self.use_cuda:
                    image = image.cuda()
                    labels = labels.cuda()
                image = Variable(image)
                labels = Variable(labels)

                pred_labels = self.classifier(image)

                _, predicted = torch.max(pred_labels, 1)
                c = (predicted == labels).squeeze()

                for i in range(labels.shape[0]):
                    label = labels[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = self.loss(pred_labels, labels)
                train_loss += loss.item()

            avg_loss = train_loss / len(val_loader)
            accuracy = 100.0 * correct / total

            rootLogger.info("Val loss = [%.3f], Val Acc = [%.2f]" % (avg_loss, accuracy))

            for i in range(2):
                print('Accuracy of %5s : %2d %%' % (
                    classes[i], 100 * class_correct[i] / class_total[i]))

And the networks looks like this :


class Classifier3(BaseClassifier):
    """
    Test Classifier 2
    """

    def __init__(self, *args, **kwargs):
        super(Classifier3, self).__init__(*args, **kwargs)

    def init(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                nn.init.xavier_normal_(layer.weight.data)
            if isinstance(layer, nn.ConvTranspose2d):
                nn.init.xavier_normal_(layer.weight.data)
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight.data)

    def make_classifier(self):
        return nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=(3, 3), stride=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm2d(8),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, return_indices=True),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), stride=1),
            nn.ReLU(),

            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, return_indices=True),

            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, return_indices=True),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, return_indices=True),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, return_indices=True),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, return_indices=True),

            Flatten(),
            nn.Dropout(0.5),
            nn.Linear(in_features=1024, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Dropout(0.5),
            nn.Linear(in_features=256, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Dropout(0.5),
            nn.Linear(in_features=128, out_features=64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Dropout(0.5),
            nn.Linear(in_features=64, out_features=2),
            nn.Softmax()
        )

Large code (slighly read it). My guess is you aren’t saving optimizer state_dict.
Adam use running estimators, if you don’t save its state dict, their are initilize while reloading model and just in a single backward leads to wrong point.

I am not quite sure what you are trying to say. Could you please elaborate. Sorry for the cluttered code. Let me know if you don’t understand anything. I am stuck on this issue for like days.

Adam and other optimizers has “parameters”. These parameters are crucial in order to guide backpropagation and gradients like momentum does in SGD.

If you don’t save optimizer’ state dict, these parameters are wrong once you re-load the model and make the backprop step to point in a wrong direction and goes far away from previous point.

Is it possible?

But does it even matter because during validation I am not making any update on the network. Just the forward pass is made. Correct me if I am wrong.

Hmm i see. I have no clue. Are you using same batch size?
You can try to compare if loaded state dict equals the one which provides good acc.

You may have a bug choosing when to save / from where to load.

Have a look at this:

Check batchnorm is ok