Gpytorch.mlls error when computing loss.backward()

Hi everybody. I’m trying to do gaussian process regression to do noisy function approximation. I’ve defined my model but I get an error when computing the gradient of the loss function:
“RuntimeError: grad can be implicitly created only for scalar outputs”
In fact the shape of the loss that my model computes is the following (I printed it):
shape loss torch.Size([265])
tensor([0.7655, 0.7654, 0.7625, 0.7626, 0.7651, 0.7622, 0.7654, 0.7654, 0.7650,
0.7646, 0.7651, 0.7640, 0.7655, 0.7654, 0.7620, 0.7629, 0.7644, 0.7653,
0.7649, 0.7653, 0.7625, 0.7631, 0.7620, 0.7652, 0.7620, 0.7641, 0.7653,
0.7636, 0.7645, 0.7654, 0.7655, 0.7625, 0.7650, 0.7654, 0.7625, 0.7654,
0.7654, 0.7634, 0.7652, 0.7655, 0.7620, 0.7656, 0.7654, 0.7627, 0.7645,
0.7647, 0.7657, 0.7653, 0.7654, 0.7654, 0.7633, 0.7654, 0.7653, 0.7653,
0.7654, 0.7653, 0.7654, 0.7622, 0.7654, 0.7656, 0.7654, 0.7638, 0.7655,
0.7658, 0.7628, 0.7657, 0.7624, 0.7627, 0.7654, 0.7626, 0.7656, 0.7632,
0.7630, 0.7652, 0.7654, 0.7654, 0.7620, 0.7633, 0.7620, 0.7629, 0.7633,
0.7655, 0.7656, 0.7655, 0.7656, 0.7623, 0.7655, 0.7622, 0.7637, 0.7656,
0.7629, 0.7620, 0.7627, 0.7653, 0.7654, 0.7653, 0.7646, 0.7628, 0.7652,
0.7653, 0.7656, 0.7653, 0.7653, 0.7653, 0.7657, 0.7638, 0.7655, 0.7654,
0.7625, 0.7627, 0.7630, 0.7632, 0.7657, 0.7652, 0.7656, 0.7655, 0.7654,
0.7654, 0.7656, 0.7655, 0.7652, 0.7655, 0.7653, 0.7620, 0.7654, 0.7653,
0.7654, 0.7659, 0.7628, 0.7653, 0.7655, 0.7640, 0.7654, 0.7658, 0.7653,
0.7655, 0.7622, 0.7653, 0.7652, 0.7655, 0.7637, 0.7654, 0.7637, 0.7655,
0.7630, 0.7656, 0.7654, 0.7645, 0.7631, 0.7654, 0.7656, 0.7655, 0.7645,
0.7655, 0.7655, 0.7622, 0.7657, 0.7653, 0.7629, 0.7622, 0.7650, 0.7656,
0.7642, 0.7652, 0.7654, 0.7620, 0.7656, 0.7655, 0.7656, 0.7641, 0.7627,
0.7630, 0.7624, 0.7634, 0.7653, 0.7654, 0.7620, 0.7656, 0.7652, 0.7637,
0.7639, 0.7645, 0.7620, 0.7652, 0.7630, 0.7625, 0.7630, 0.7623, 0.7620,
0.7654, 0.7653, 0.7649, 0.7648, 0.7650, 0.7653, 0.7656, 0.7647, 0.7653,
0.7653, 0.7624, 0.7654, 0.7627, 0.7654, 0.7630, 0.7623, 0.7645, 0.7654,
0.7656, 0.7656, 0.7655, 0.7628, 0.7630, 0.7657, 0.7654, 0.7655, 0.7627,
0.7657, 0.7629, 0.7622, 0.7654, 0.7628, 0.7620, 0.7622, 0.7657, 0.7622,
0.7655, 0.7620, 0.7655, 0.7627, 0.7620, 0.7653, 0.7654, 0.7622, 0.7653,
0.7655, 0.7654, 0.7650, 0.7649, 0.7654, 0.7625, 0.7652, 0.7658, 0.7656,
0.7654, 0.7631, 0.7653, 0.7645, 0.7656, 0.7625, 0.7655, 0.7622, 0.7658,
0.7629, 0.7654, 0.7624, 0.7623, 0.7653, 0.7625, 0.7657, 0.7655, 0.7649,
0.7657, 0.7647, 0.7630, 0.7652], grad_fn=)
<class ‘torch.Tensor’>

I have checked and it seems to me to have used the same lines with the right arguments as suggested in the gpytorch tutorials, see gpytorch.mlls — GPyTorch 1.6.0 documentation for instance.

Here I paste my code, hope you can help me!

class Model(object):

    def __init__(self):
        self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
        self.model = None
        self.rng = np.random.default_rng(seed=0)  # it was already present

    def predict(self, x: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        :param x: 1d NumPy float array of shape (NUM_SAMPLES, 1)
        :return:
            Tuple of three 1d NumPy float arrays, each of shape (NUM_SAMPLES,),
            containing your predictions, the GP posterior mean, and the GP posterior stddev (in that order)
        """

        # GP to estimate the posterior mean and stddev
        gp_mean = np.zeros(x.shape[0], dtype=float)
        gp_std = np.zeros(x.shape[0], dtype=float)

        x = torch.Tensor(x)

        # Turn the model into evaluation mode
        self.model.eval()
        self.likelihood.eval()

           with torch.no_grad(), gpytorch.settings.fast_pred_var():
            # Obtain the predictive mean and covariance matrix
            f_preds = self.model(x)
            print(type(f_preds))
            gp_mean = np.array(f_preds.mean)
            gp_std = np.sqrt(f_preds.variance)  
            gp_std = np.array(gp_std)


        # GP posterior to form your predictions 
        predictions = gp_mean
        
        return predictions, gp_mean, gp_std

    def fit_model(self, train_x: np.ndarray, train_y: np.ndarray):
        """
        Fit your model on the given training data.
        :param train_x: Training features as a 1d NumPy float array of shape (NUM_SAMPLES, 1) 
        :param train_y: Training response as a 1d NumPy float array of shape (NUM_SAMPLES,1)
        """

        # Put the model into training mode
        train_x = torch.Tensor(train_x)
        train_y = torch.Tensor(train_y)

        self.model = ExactGPModel(train_x, train_y, self.likelihood)

        self.model.train()
        self.likelihood.train()

        # Use the SGD optimizer
        optimizer = torch.optim.Adagrad([
            {'params': self.model.parameters()},  ],  lr=0.1)  
        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
        
        training_iter = 1200  # numero di epochs

        for i in range(training_iter):
            # Zero gradients from previous iteration
            optimizer.zero_grad()
            # Output from model
            output = self.model(train_x)
            #output = output.reshape(output.size(0), -1)
            # Calc loss and backprop gradients
            #print('shape output', output.shape)
            loss = -mll(output, (train_y)) 
            print('shape loss', loss.shape)
            print(loss)
            print(type(loss))
            loss.backward() #TODO: HERE THE ERROR!!!!
           print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                i + 1, training_iter, loss.item(),
                self.model.covar_module.base_kernel.lengthscale.item(),
                self.model.likelihood.noise.item()))
            optimizer.step()


# Auxiliary class
class ExactGPModel(gpytorch.models.ExactGP):
    # every object of ExactGPmodel will have every module of models.ExactGP

    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ZeroMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=3 / 2))
        

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

As the error suggests you would either have to reduce the loss first, e.g. via loss.mean().backward(), or you would need to pass the gradient explicitly to the non-scalar loss tensor, e.g. via loss.backward(torch.ones_like(loss)).

Thanks, I already tried with the means and it’s a possible solution, I just don’t really understand why the loss is not itself a scalar how it should be supposed to be

I’m not familiar enough with this loss function, but based on the docs the non-scalar output is expected:

Exact MLL. Output shape corresponds to batch shape of the model/input data.

1 Like