GPyTorch Shape Mismatch Error

skarah00 · July 31, 2024, 3:33pm

Hello, I am trying to run Gaussian Process Regression with GPyTorch, where I feed images as the input to the model using a CNN. I am following the GPyTorch docs for “PyTorch NN Integration”. I keep getting the error “RuntimeError: Shape mismatch: objects cannot be broadcast to a single shape” when running the evaluation. At first I thought it was because the training and validation sets were of different sizes, but the documentation shows that is not an issue. Here is the setup of my code with example data

train_x = torch.randn(24, 3,224,224).to(device)
train_y = torch.randn(24).to(device)

test_x = torch.randn(26, 3,224,224).to(device)
test_y = torch.randn(26).to(device)

class Baseline_CNN(nn.Module):
    def __init__(self):
        super(Baseline_CNN, self).__init__()
        
        net = models.resnet50(weights="IMAGENET1K_V1")
        
        for param in net.parameters():
            param.requires_grad = False #True=finetune
        
        self.resnet_pretrained = net
        self.fc1 = nn.Linear(self.resnet_pretrained.fc.out_features, 512)
        self.dropout = nn.Dropout(p=0.2)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 250)
        self.fc3 = nn.Linear(250, 2)
        
    def forward(self, image):
        img_features = self.resnet_pretrained(image)
        img_features = torch.flatten(img_features, 1)
        img_features = self.fc1(img_features.float())
        x = self.relu(img_features)
        x = self.dropout(x)
        x = self.fc2(x.float())
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)     #****
        return x
    
feature_reducer = Baseline_CNN().to(device)

class CombinedExtractorandGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(CombinedExtractorandGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2))
        
        self.feature_extractor = feature_reducer
        self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        projected_x = self.scale_to_bounds(projected_x)
        
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device)
model = CombinedExtractorandGPModel(train_x, train_y, likelihood).to(device)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

optimizer = torch.optim.Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': model.likelihood.parameters()},
], lr=0.01)

likelihood.train()
model.train()


training_iter = 5

def train():
    iterator = tqdm.notebook.tqdm(range(training_iter))
    for i in iterator:
        optimizer.zero_grad()
        output = model(train_x)
#         print(output.shape, train_x.shape)
        loss = -mll(output, train_y)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()
    
%time train()

model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
    print(test_x.shape)
    preds = model(test_x)

The training works, as the mll decreases as training continues. Even when I would make the testing set the same size as the training, the testing would work, but preds would be of shape 0. How can I fix this? I have tried different kernels, different output layer sizes for the CNN, do CNN’s not work for GPyTorch integration with the ExactGP module?