Currently, I am coding the UCB-HVI acquisition function for multi-objective Bayesian optimization. Here is the code of the acquisition function:
class UCBHypervolumeImprovement(MultiObjectiveAnalyticAcquisitionFunction):
def __init__(
self,
model,
train_Y: Tensor,
ref_point: Tensor,
partitioning: NondominatedPartitioning,
posterior_transform: PosteriorTransform | None = None,
beta: float = 0.1,
) -> None:
"""
UCB-Hypervolume Improvement acquisition function for multi-objective BO.
Args:
model: A fitted multi-output GP model.
train_Y: A `n x m` tensor of previously observed outcomes.
ref_point: A `1 x m` reference point tensor (for HV calculation).
partitioning: BoxDecomposition of non-dominated space.
posterior_transform: Optional posterior transform.
beta: UCB parameter controlling exploration (higher = more exploration).
"""
super().__init__(model=model, posterior_transform=posterior_transform)
self.train_Y = train_Y
self.ref_point = ref_point
self.partitioning = partitioning
self.beta = beta
def forward(self, X: Tensor) -> Tensor:
"""
Evaluate the UCB-HVI acquisition function at the candidate points X.
Args:
X: A `batch_shape x q x d` tensor of candidate points.
Returns:
A `batch_shape` tensor of acquisition values.
"""
posterior = self.model.posterior(X, posterior_transform=self.posterior_transform)
mean = posterior.mean # shape: batch_shape x q x m
std = posterior.variance.clamp_min(1e-9).sqrt() # avoid sqrt(0)
# UCB formulation for minimization: lower mean - beta * std
ucb = mean - self.beta * std # shape: batch_shape x q x m
# Assume q=1 for analytic HVI (single candidate point per forward call)
if ucb.shape[-2] != 1:
raise NotImplementedError("Only q=1 (single candidate point) is supported.")
# Remove q-dimension: shape becomes batch_shape x m
ucb_point = ucb.squeeze(-2)
Ref_point=torch.tensor(self.ref_point, dtype=torch.float64)
Ref_pointm=Ref_point*-1
# Compute hypervolume improvement
# Expand train_Y and append the new point (in each batch)
batch_hvi = []
for i in range(ucb_point.shape[0]):
# Combine the existing Pareto front with new candidate
expanded_Y = torch.cat([self.train_Y, ucb_point[i].unsqueeze(0)], dim=0)
# Filter non-dominated points
pareto_mask0=is_non_dominated(self.train_Y, maximize=False)
pareto_Y0 = self.train_Y[pareto_mask0]
pareto_Y0m=pareto_Y0*-1
pareto_mask = is_non_dominated(expanded_Y, maximize=False)
pareto_Y = expanded_Y[pareto_mask]
pareto_Ym=pareto_Y*-1
#hv = self._hv.compute(pareto_Y)
bd = DominatedPartitioning(ref_point=Ref_pointm, Y=pareto_Y0m)
hv = bd.compute_hypervolume().item()
bd2 = DominatedPartitioning(ref_point=Ref_pointm, Y=pareto_Ym)
hv2 = bd2.compute_hypervolume().item()
hvi=hv2-hv
batch_hvi.append(hvi)
return torch.tensor(batch_hvi)
The “element 0 of tensors does not require grad and does not have a grad_fn” occured, so I am trying to fix this error. This error did not occur when I used other acquisition function, so I am trying to modify the code of acquisition function. To fix this, I removed .item() from hv = bd.compute_hypervolume().item(). However, since the result of bd.compute_hypervolume().item() was 0-dimension tensor, I added the code
hvi0=hvi.unsqueeze(0)
if i==0:
batch_hvi=hvi0
else:
batch_hvi=torch.cat((batch_hvi,hvi0))
However, the error in the picture occured.
I would like to know how can I fix this error or the other way how can I fix the error “element 0 of tensors does not require grad and does not have a grad_fn”.
Here is the code I used to test the acquisition function:
num=10
dat=lhs(5,samples=num, criterion='center')
t = torch.tensor(dat,dtype=torch.float64)
train_Y1 = torch.sin(t)
covar_module = MaternKernel(nu=2.5, ard_num_dims=5)
ref_point=[3.0,3.0, 3.0, 3.0, 3.0] #reference point
ref_point_ten=torch.tensor(ref_point, dtype=torch.float64)
ref_point_ten2=ref_point_ten*-1
bounds = torch.stack([torch.zeros(5), torch.ones(5)]).to(torch.double)
iteration=10
for j in range(iteration):
model = SingleTaskGP(t, train_Y1, covar_module = covar_module)
if (j % 10) == 0:
Mll = ExactMarginalLogLikelihood(model.likelihood, model)
fit_gpytorch_mll_torch(mll=Mll)
partition= NondominatedPartitioning(ref_point=ref_point_ten, Y=train_Y1)
acquisition_ftn = UCBHypervolumeImprovement(model=model, train_Y=train_Y1, ref_point=ref_point, partitioning=partition, beta=4)
candidate, acq_value = optimize_acqf(acquisition_ftn, bounds=bounds, q=1, num_restarts=16, raw_samples=256)
#updating training points
t = torch.cat((t, candidate))
train_y1 = torch.sin(candidate)
train_Y1 = torch.cat((train_Y1,train_y1))