Struggling with approximating a 2D function by MLP

dmeister · June 21, 2024, 9:23am

I’m trying to train an MLP to learn a simple 2D function such as Gaussian or a step function, but the loss converges only to a certain relatively high value. The approximation is almost constant. I tried to tweak learning rate, network size, weight initialization, activation functions, etc, but without any success.

import numpy as np
import torch
import matplotlib.pyplot as plt
from torch import optim, nn

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)


class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.n = 16

        self.fc1 = nn.Linear(2, self.n)
        self.fc2 = nn.Linear(self.n, self.n)
        self.fc3 = nn.Linear(self.n, self.n)
        self.fc4 = nn.Linear(self.n, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return self.fc4(x)


def loss_fn (z_pred, z):
    loss = torch.mean((z_pred - z) ** 2)
    return loss


def f(x, y):
    return np.logical_and(x < 0.5, y < 0.5).astype(float)
    # return np.exp(-(x**2 + y**2))


if __name__ == '__main__':
    n = 50
    learning_rate = 0.001
    epochs = 1500

    t = torch.linspace(0, 1, n)
    X = torch.cartesian_prod(t, t).transpose(0, 1)
    print(X.size())
    x = X[0]
    y = X[1]
    g = torch.from_numpy(f(x.cpu().detach().numpy(), y.cpu().detach().numpy()))
    g = g.to(device)
    print("x ", x.size(), " y ", y.size(), " g ", g.size())

    Xt, Yt = np.meshgrid(t.cpu(), t.cpu())

    model = Net()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    X = torch.cat((x.reshape(-1, 1), y.reshape(-1, 1)), dim=1)

    for e in range(epochs):
        g_pred = model(X)
        loss = loss_fn(g_pred, g)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 100 == 0:
            print(e, loss.item())

soulitzer · June 21, 2024, 4:00pm

What is the largest model size you’ve tried?

dmeister · June 22, 2024, 4:55am

I found a bug in the loss function (z_pred and z had different shape). My bad.