Implementation of Constrained Multi-Objective Optimization for material discovery using qNEHVI

I am working on a material discovery problem where I am using qNEHVI for constrained multiobjective optimization (two objectives and one constrained), with a batch size of 3. I am a beginner with both PyTorch and Botorch, so I hope I can get help in solving my issue.

Here is my code

import pandas as pd
import numpy as np
import torch
import gpytorch
import botorch
import matplotlib.pyplot as plt
plt.style.use(“seaborn-v0_8-dark”)
plt.rcParams[“figure.figsize”] = (8, 6)

from tqdm.notebook import tqdm
from botorch.acquisition.monte_carlo import qExpectedImprovement
from botorch.acquisition.multi_objective.monte_carlo import qNoisyExpectedHypervolumeImprovement
from botorch.acquisition.multi_objective.monte_carlo import qExpectedHypervolumeImprovement

from botorch.acquisition.analytic import ExpectedImprovement

from botorch.utils.multi_objective.pareto import is_non_dominated
from botorch.utils.multi_objective.box_decompositions.dominated import DominatedPartitioning
from botorch.utils.multi_objective.box_decompositions.non_dominated import FastNondominatedPartitioning
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.optim.optimize import optimize_acqf

import warnings

train_x = torch.tensor([[3., 0., 0.], [0., 0., 0.], [1., 0., 0.]]).type(torch.float64)
train_y1 = torch.tensor([17189., 10068., 11139.]).to(train_x.dtype)
train_y2 = torch.tensor([77., 4., 17.]).to(train_x.dtype)

combining objectives 1 & 2

train_Y = torch.vstack([train_y1.flatten(), train_y2.flatten()]).transpose(-1, -2)

constraint_Y = torch.tensor([2.5, 0., 0.]).to(train_x.dtype).unsqueeze(-1)

Normalize train_Y and Constraint

train_y = (train_Y - train_Y.mean()) / train_Y.std()
constraint = (constraint_Y - constraint_Y.mean()) / constraint_Y.std()

class GPModel(gpytorch.models.ExactGP, botorch.models.gpytorch.GPyTorchModel):
_num_outputs = 1

def __init__(self, train_x, train_y, likelihood):
    super().__init__(train_x, train_y, likelihood)
    self.mean_module = gpytorch.means.ConstantMean()
    self.covar_module = gpytorch.kernels.ScaleKernel(
        gpytorch.kernels.MaternKernel(nu=2.5, ard_num_dims=train_x.shape[1])
    )

def forward(self, x):
    mean_x = self.mean_module(x)
    covar_x = self.covar_module(x)
    return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

def fit_gp_model(train_x, train_y, num_train_iters=500):
# declare the GP
noise = 1e-4

likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPModel(train_x, train_y, likelihood)
model.likelihood.noise = noise

# train the hyperparameter (the constant)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

model.train()
likelihood.train()

for i in range(num_train_iters):
    optimizer.zero_grad()

    output = model(train_x)
    loss = -mll(output, train_y)

    loss.backward()
    optimizer.step()

model.eval()
likelihood.eval()

return model, likelihood

ref_point = torch.tensor([train_y[:, 0].min(), train_y[:, 1].min()])
bounds = torch.tensor([[-10., -10., -10.], [10., 10., 10.]])

num_queries = 50
num_repeats = 10
batch_size = 3

hypervolumes =torch.zeros((num_repeats, num_queries))

for trial in range(num_repeats):
print(“trial”, trial)

torch.manual_seed(trial)

for i in tqdm(range(num_queries)):
    dominated_part = DominatedPartitioning(ref_point, train_y)
    hypervolumes[trial, i] = dominated_part.compute_hypervolume().item()
    
    model1, likelihood1 = fit_gp_model(train_x, train_y[:, 0].squeeze(-1))
    model2, likelihood2 = fit_gp_model(train_x, train_y[:, 1].squeeze(-1))
    
    # add the constraint
    constraint_model, constraint_likelihood = fit_gp_model(train_x, constraint_Y.squeeze(-1))
    
    
    #if strategy == qNEHVI:
        
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        policy = qNoisyExpectedHypervolumeImprovement(
            model=ModelListGP(model1, model2, constraint_model),
            ref_point=ref_point,
            X_baseline = train_x,
            partitioning=FastNondominatedPartitioning(ref_point, train_y),
            constraints=[lambda samples: samples[..., 2] <= 6.0])

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)

        next_x, acq_val = optimize_acqf(
            acq_function=policy,
            bounds = bounds, 
            q=batch_size,
            num_restarts=100,
            raw_samples=500)
            
next_y = train_y(next_x)
next_constraint = constraint_Y(next_x)

train_x = torch.cat([train_x, next_x])
train_Y = torch.cat([train_y, next_y])
constraint = torch.cat([constraint_Y, next_constraint])

Here is the stack trace:

RuntimeError Traceback (most recent call last)
Cell In[10], line 24
21 with warnings.catch_warnings():
22 warnings.simplefilter(“ignore”)
—> 24 policy = qNoisyExpectedHypervolumeImprovement(
25 model=ModelListGP(model1, model2, constraint_model),
26 ref_point=ref_point,
27 X_baseline = train_x,
28 partitioning=FastNondominatedPartitioning(ref_point, train_y),
29 constraints=[lambda samples: samples[…, 2] <= 6.0])
31 with warnings.catch_warnings():
32 warnings.filterwarnings(“ignore”, category=RuntimeWarning)

File ~\Anaconda3\envs\Project_Battery\lib\site-packages\botorch\acquisition\multi_objective\monte_carlo.py:515, in qNoisyExpectedHypervolumeImprovement.init(self, model, ref_point, X_baseline, sampler, objective, constraints, X_pending, eta, prune_baseline, alpha, cache_pending, max_iep, incremental_nehvi, cache_root, **kwargs)
510 # In the case that X_pending is not None, but there are fewer than
511 # max_iep pending points, the box decompositions are not performed in
512 # set_X_pending. Therefore, we need to perform a box decomposition over
513 # f(X_baseline) here.
514 if X_pending is None or X_pending.shape[-2] <= self._max_iep:
→ 515 self._set_cell_bounds(num_new_points=X_baseline.shape[0])
516 # Set q_in=-1 to so that self.sampler is updated at the next forward call.
517 self.q_in = -1

File ~\Anaconda3\envs\Project_Battery\lib\site-packages\botorch\acquisition\multi_objective\monte_carlo.py:625, in qNoisyExpectedHypervolumeImprovement._set_cell_bounds(self, num_new_points)
622 self.partitioning = BoxDecompositionList(*partitionings)
623 else:
624 # use batched partitioning
→ 625 obj = _pad_batch_pareto_frontier(
626 Y=obj,
627 ref_point=self.ref_point.unsqueeze(0).expand(
628 obj.shape[0], self.ref_point.shape[-1]
629 ),
630 feasibility_mask=feas,
631 )
632 self.partitioning = self.p_class(
633 ref_point=self.ref_point, Y=obj, **self.p_kwargs
634 )
635 cell_bounds = self.partitioning.get_hypercell_bounds().to(self.ref_point)

File ~\Anaconda3\envs\Project_Battery\lib\site-packages\botorch\utils\multi_objective\box_decompositions\utils.py:73, in _pad_batch_pareto_frontier(Y, ref_point, is_pareto, feasibility_mask)
66 raise UnsupportedError(
67 "_pad_batch_pareto_frontier only supports a single "
68 f"batch dimension, but got {len(batch_shape)} "
69 “batch dimensions.”
70 )
71 if feasibility_mask is not None:
72 # set infeasible points to be the reference point (corresponding to the batch)
—> 73 Y = torch.where(feasibility_mask.unsqueeze(-1), Y, ref_point)
74 if not is_pareto:
75 pareto_mask = is_non_dominated(Y)

RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 2