[resolved] Optimization of a logistic regression

EDIT: Resolved see comment

Hi everyone,
I’m trying to solve a relaxation of the below optimization problem over hyperplanes w.

I’m solving it by relaxing the problem by approximating the indicator loss with logistic loss as in a logistic regression. Note that the generate_data(n, d) function just generates a random data frame of size n x d.

However, when I run the below routine, it does not decrease the objective; if anything it increases it. I’m not sure exactly where the error lies – either in the optimization step or in the relaxation.

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from synth_data_class import *

class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.linear = nn.Linear(input_size, 1, bias=True)

    # nn.Linear automatically does softmax
    def forward(self, x):
        out = self.linear(x)
        # compute softmax
        soft_out = torch.stack([torch.exp(out)/(1 + torch.exp(out)), 1/(1 + torch.exp(out))])
        soft_out = soft_out.transpose(0,1).view(-1,2)
        return soft_out


class Dataset(Dataset):
    def __init__(self, X, y):
        self.data = X
        self.labels = y

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.data.__getitem__(idx)
        y = self.labels[idx]
        return {'X': x, 'y': y}

# get data
n = 100
d = 6
data = torch.tensor(np.array(gen_synthetic_data(d, n, 'binary', intercept=False)[0])).type(torch.FloatTensor)
labels = torch.tensor([1]*n).type(torch.FloatTensor)



dataset = Dataset(data, labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
net = Net(d)
a = torch.tensor(.3)
learning_rate = 0.001
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
criterion = nn.NLLLoss()
n_passes = 1000
for epoch in range(n_passes):
    for batch in dataloader:
        #print(list(net.parameters()))
        outputs = net(data)
        _, predicted = torch.max(outputs.data, 1)
        if torch.tensor(1.0/n)*torch.sum(predicted) > a:
            sign = 1
        else:
            sign = -1
        print(torch.abs(torch.tensor(1.0/n)*torch.sum(predicted)-a))
        X, y = batch['X'], batch['y']
        optimizer.zero_grad()
        y_hat = net(X)
        # compute log likelihood loss
        loss = torch.tensor(sign)*criterion(torch.log(y_hat), y.type(torch.LongTensor))
        loss.backward()
        optimizer.step()

Resolved: The sign was wrong in the absolute value portion of the code, the solution was switching them.

        if torch.tensor(1.0/n)*torch.sum(predicted) > a:
            sign = 1
        else:
            sign = -1