I am trying to use gpytorch library for regression model where I give as inputs x(t-2),x(t-1)
and the Deep Gaussian process model returns y(t)
at the current time step. Here is part of my code
To reproduce
import torch
from torch import nn
from torch.nn import functional as F
import gpytorch
from gpytorch.models.deep_gps.dspp import DSPPLayer, DSPP
from gpytorch.means import ConstantMean, ZeroMean, LinearMean
from gpytorch.kernels import ScaleKernel, SpectralMixtureKernel
from gpytorch.variational import VariationalStrategy, CholeskyVariationalDistribution
from gpytorch.distributions import MultivariateNormal
from gpytorch.likelihoods import GaussianLikelihood
class DGPHiddenLayer(DSPPLayer):
def __init__(self, input_dims, output_dims, device, num_inducing=128, inducing_points=None, mean_type='constant', num_mixtures=5, num_quad_sites= 8):
if output_dims is None:
inducing_points = torch.randn(num_inducing, input_dims).to(device=device)
batch_shape = torch.Size([])
else:
inducing_points = torch.randn(output_dims, num_inducing, input_dims).to(device=device)
batch_shape = torch.Size([output_dims])
variational_distribution = CholeskyVariationalDistribution(
num_inducing_points=num_inducing,
batch_shape=batch_shape
)
variational_strategy = VariationalStrategy(
self,
inducing_points,
variational_distribution,
learn_inducing_locations=True
)
super().__init__(variational_strategy, input_dims, output_dims, num_quad_sites)
if mean_type == 'constant':
self.mean_module = ConstantMean(batch_shape=batch_shape)
elif mean_type == 'zero':
self.mean_module = ZeroMean(batch_shape=batch_shape)
else:
self.mean_module = LinearMean(input_dims)
self.covar_module = ScaleKernel(
SpectralMixtureKernel(num_mixtures=num_mixtures, batch_shape=batch_shape, ard_num_dims=input_dims),
batch_shape=batch_shape, ard_num_dims=None
)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return MultivariateNormal(mean_x, covar_x)
class DeepGaussianProcesses(DSPP):
def __init__(self, input_size, output_size, device, num_inducing= 50, Q=8, max_cholesky_size=10000, **kwargs):
# pass hidden layer sizes as separate arguments as well as array
for k, v in kwargs.items():
if k=='mean_type':
setattr(self, k, v)
hidden_size = hidden_size_extract(kwargs, 'hidden_size')
self.hidden_size = hidden_size
self.hidden_size.append(output_size)
self.max_cholesky_size = max_cholesky_size
self.device = device
first_layer = DGPHiddenLayer(
input_dims=input_size,
output_dims=self.hidden_size[0],
device = self.device,
num_inducing=num_inducing,
mean_type=self.mean_type,
num_quad_sites=Q,
)
# variable count of hidden layers and neurons
if self.mean_type=='linear' or self.mean_type=='constant':
hidden_layers = nn.ModuleList(
[
DGPHiddenLayer(
input_dims=self.hidden_size[i],
output_dims=self.hidden_size[i + 1],
device = self.device,
num_inducing=num_inducing,
mean_type='constant',
num_quad_sites=Q,
)
for i in range(len(self.hidden_size) - 1)
])
else:
hidden_layers = nn.ModuleList(
[
DGPHiddenLayer(
input_dims=self.hidden_size[i],
output_dims=self.hidden_size[i + 1],
device = self.device,
num_inducing=num_inducing,
mean_type='zero',
num_quad_sites=Q,
)
for i in range(len(self.hidden_size) - 1)
])
super().__init__(Q)
self.first_layer = first_layer
self.hidden_layers = hidden_layers
self.likelihood = GaussianLikelihood()
self.to(device=self.device)
def forward(self, inputs):
out = self.first_layer(inputs)
for hidden in self.hidden_layers:
out = hidden(out)
return out
def predict(self, x):
with torch.no_grad(), gpytorch.settings.max_cholesky_size(self.max_cholesky_size):
mus = []
variances = []
samples = []
preds = self.likelihood(self(x))
mus.append(preds.mean)
variances.append(preds.variance)
samples.append(preds.rsample())
return torch.cat(mus, dim=-1), torch.cat(variances, dim=-1), torch.cat(samples, dim=-1)
class DGP(DeepGaussianProcesses):
def __init__(self, latent_size, action_size, lagging_size, device, hidden_size=[50], mean_type= 'constant'):
input_size =latent_size*lagging_size
super(DGP, self).__init__(input_size=input_size, output_size= action_size, device=device, hidden_size=hidden_size, mean_type=mean_type)
Here is where I apply the DGP
class to synthetic data
device='cuda'
latent_size=20
action_size=6
lagging_size=2
deep_module = DGP(latent_size, action_size, lagging_size, device).to(device=device)
lagging_states=torch.rand((1,8,40))
a, a_v, a_s = deep_module.predict(lagging_states)
I get this error message which I don’t know how to fix it
a, a_v, a_s = deep_module.predict(lagging_states)
File "/home/models.py", line 313, in predict
preds = self.likelihood(self(x))
File "/home/lib/python3.8/site-packages/gpytorch/likelihoods/likelihood.py", line 65, in __call__
return self.marginal(input, *args, **kwargs)
File "/home/lib/python3.8/site-packages/gpytorch/likelihoods/gaussian_likelihood.py", line 72, in marginal
full_covar = covar + noise_covar
File "/home/lib/python3.8/site-packages/gpytorch/lazy/lazy_tensor.py", line 2035, in __add__
return AddedDiagLazyTensor(self, other)
File "/home/lib/python3.8/site-packages/gpytorch/lazy/added_diag_lazy_tensor.py", line 32, in __init__
broadcasting._mul_broadcast_shape(lazy_tensors[0].shape, lazy_tensors[1].shape)
File "/home/lib/python3.8/site-packages/gpytorch/utils/broadcasting.py", line 20, in _mul_broadcast_shape
raise RuntimeError("Shapes are not broadcastable for mul operation")
RuntimeError: Shapes are not broadcastable for mul operation
I looked for similar errors and I applied different solutions suggested for similar error. However, they didn’t work in my case. I will appreciate if anyone can offer a solution for this error. Many thanks.