I’m trying to train an MLP to learn a simple 2D function such as Gaussian or a step function, but the loss converges only to a certain relatively high value. The approximation is almost constant. I tried to tweak learning rate, network size, weight initialization, activation functions, etc, but without any success.
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch import optim, nn
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.n = 16
self.fc1 = nn.Linear(2, self.n)
self.fc2 = nn.Linear(self.n, self.n)
self.fc3 = nn.Linear(self.n, self.n)
self.fc4 = nn.Linear(self.n, 1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.relu(self.fc3(x))
return self.fc4(x)
def loss_fn (z_pred, z):
loss = torch.mean((z_pred - z) ** 2)
return loss
def f(x, y):
return np.logical_and(x < 0.5, y < 0.5).astype(float)
# return np.exp(-(x**2 + y**2))
if __name__ == '__main__':
n = 50
learning_rate = 0.001
epochs = 1500
t = torch.linspace(0, 1, n)
X = torch.cartesian_prod(t, t).transpose(0, 1)
print(X.size())
x = X[0]
y = X[1]
g = torch.from_numpy(f(x.cpu().detach().numpy(), y.cpu().detach().numpy()))
g = g.to(device)
print("x ", x.size(), " y ", y.size(), " g ", g.size())
Xt, Yt = np.meshgrid(t.cpu(), t.cpu())
model = Net()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
X = torch.cat((x.reshape(-1, 1), y.reshape(-1, 1)), dim=1)
for e in range(epochs):
g_pred = model(X)
loss = loss_fn(g_pred, g)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if e % 100 == 0:
print(e, loss.item())