Saving/Loading PyTorch Models Wrapped in sklearn Compatable Estimators

jagraves21 · March 14, 2019, 12:39am

I have written an Autoencoder using PyTorch and I have rolled it into a custom sklearn BaseEstimator. I normally train the estimator on a machine with a GPU and save them for later evaluation using pickle. If I try to load an estimator on a machine where the model was stored on the GPU, I get the following error:

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=‘cpu’ to map your storages to the CPU.

Is there a way to force the PyTorch model to be moved to the CPU before pickling the estimator without an explicit call?
Is there a way to unpickle an estimator that was saved while the model was on the GPU?

The following is an example of my PyTorch model and sklearn comptatable estimator, along with an example of how I am trying to save and load my models.

PyTorch Model

import torch.nn as nn
class _AutoEncoder(nn.Module):
    def __init__(self, input_dim, output_dim, encoder_dim=4):
        super(_AutoEncoder, self).__init__()
        hidden_dim = int( (input_dim+encoder_dim)/2 )
        layers = []
        layers.append( nn.Linear(input_dim, hidden_dim) )
        layers.append( nn.Linear(hidden_dim, encoder_dim) )
        self.encoder = nn.Sequential(*layers)
        layers = []
        layers.append( nn.Linear(encoder_dim, hidden_dim) )
        layers.append( nn.Linear(hidden_dim, output_dim) )
        self.decoder = nn.Sequential(*layers)

    def forward(self, X):
        return self.decoder( self.encoder( X ) )

sklearn Comptable Estimator

import warnings
import inspect
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn import utils as sk_utils

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data

class AutoEncoder(BaseEstimator, TransformerMixin):
    def __init__(
        self,
        encoder_dim=4,
        n_epochs=200,
        batch_size=None,
        shuffle=True,
        use_cuda=False
    ):
        super(AutoEncoder, self).__init__()
        args, _, _, values = inspect.getargvalues(inspect.currentframe())
        values.pop("self")
        for arg, val in values.items():
            setattr(self, arg, val)

        if use_cuda:
            if torch.cuda.is_available():
                self.device = torch.device("cuda")
            else:
                self.device = torch.device("cpu")
                warnings.warn("cuda not avaliable", UserWarning)
        else:
            self.device = torch.device("cpu")

    def fit(self, X, y=None):
        # X, y = sk_utils.check_X_y(X, y, ensure_2d=False, allow_nd=True)
        self._model = self._train_classifier(X, y)
        return self

    def transform(self, X):
        sk_utils.validation.check_is_fitted(self, ['_model'])
        X = sk_utils.check_array(X)
        X = torch.from_numpy(X.astype(np.float32)).to(self.device)
        with torch.no_grad():
            self._model.eval()
            output = self._model.forward( X )
            return output.cpu().numpy()

    def encode(self, X):
        sk_utils.validation.check_is_fitted(self, ['_model'])
        X = sk_utils.check_array(X)
        X = torch.from_numpy(X.astype(np.float32)).to(self.device)
        with torch.no_grad():
            self._model.eval()
            output = self._model.encoder( X )
            return output.cpu().numpy()

    def decode(self, X):
        sk_utils.validation.check_is_fitted(self, ['_model'])
        X = sk_utils.check_array(X)
        X = torch.from_numpy(X.astype(np.float32)).to(self.device)
        with torch.no_grad():
            self._model.eval()
            output = self._model.decoder( X )
            return output.cpu().numpy()

    def _train_classifier(self, x_train, y_train):
        x_train = torch.from_numpy(x_train.astype(np.float32)).to(self.device)
        y_train = torch.from_numpy(y_train.astype(np.float32)).to(self.device)
        input_dim = x_train.shape[-1]
        output_dim = y_train.shape[-1]
        model = _AutoEncoder(input_dim, output_dim, encoder_dim=self.encoder_dim).to(self.device)
        loss_function = nn.MSELoss()
        optimizer = optim.Adam(model.parameters())
        print model
        if self.batch_size is None:
            return self._batch_train_simple_classifier(x_train, y_train, model, loss_function, optimizer)
        else:
            return self._minibatch_train_simple_classifier(x_train, y_train, model, loss_function, optimizer)

    def _batch_train_simple_classifier(self, x_train, y_train, model, loss_function, optimizer):
        for epoch in range(1, self.n_epochs+1):
            model.train()
            optimizer.zero_grad()
            outputs = model.forward(x_train)
            loss = loss_function(outputs, y_train)
            loss.backward()
            optimizer.step()
            if epoch % 10 == 0 or epoch == self.n_epochs:
                message = "Train Epoch: {:5d},    Loss: {:15.6f}".format(
                    epoch,
                    loss.item()
                )
                print message
        return model

    def _minibatch_train_simple_classifier(self, x_train, y_train, model, loss_function, optimizer):
        train_data = torch.utils.data.TensorDataset(x_train, y_train)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size=self.batch_size, shuffle=self.shuffle)
        for epoch in range(1, self.n_epochs+1):
            for data, target in train_loader:
                model.train()
                optimizer.zero_grad()
                outputs = model.forward(data)
                loss = loss_function(outputs, target)
                loss.backward()
                optimizer.step()
            if epoch % 10 == 0 or epoch == self.n_epochs:
                model.eval()
                outputs = model.forward(x_train)
                loss = loss_function(outputs, y_train)
                message = "Train Epoch: {:5d},    Loss: {:15.6f}".format(
                    epoch,
                    loss.item()
                )
                print message
        return model

Training

This is normally done on a machine with a GPU.

from sklearn import datasets as sk_datasets
digits = sk_datasets.load_digits(n_class=10, return_X_y=False)
data = digits.data

ae = AutoEncoder(
    encoder_dim=2,
    n_epochs=100,
    batch_size=128,
    shuffle=True,
    use_cuda=True
)

data_fitted = ae.fit_transform(data, data)

Saving the Estimator

I’d like to find a way to have the PyTorch model moded to the CPU before it is saved without having an explicit call. Maybe a function as part of the AutoEncoder class that gets called as it is pickled?

with open("autoencoder.pkl", "wb") as fp:
    # ae._model needs to be moved to the CPU here.
    # I don't want to have to call ae._model.cpu() explicitly
    pickle.dump(ae, fp)

Loading

I can not figure out how to load the estimator on a machine without a GPU if it was saved while the PyTorch model was still on the GPU.

# This gives an error if the model was saved while on the GPU,
# and a GPU is not avaiable when loading.
with open("autoencoder.pkl", "rb") as fp:
    model = pickle.load(fp)

# This also a similar error. I also would not expect this to
# work since the pickle file contains an sklearn estimator
# wrapping a PyTorch model.
with open("autoencoder.pkl", "rb") as fp:
    touch.load(fp, map_location="cpu")