# Does my network think that function f(x) = 0 is the best mapping function for my data?

I define a simple network and train it.

``````import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import random_split, Dataset, DataLoader

class BaseNet(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(
nn.BatchNorm1d(num_features=1, momentum=0.1, eps=2e-5),
nn.Conv1d(in_channels=1, out_channels=4, kernel_size=1, bias=False),
nn.BatchNorm1d(num_features=4, momentum=0.1, eps=2e-5),
nn.PReLU(num_parameters=4),
nn.Conv1d(in_channels=4, out_channels=16, kernel_size=3, bias=False, padding=1),
nn.BatchNorm1d(num_features=16, momentum=0.1, eps=2e-5),
nn.PReLU(num_parameters=16),
nn.Conv1d(in_channels=16, out_channels=4, kernel_size=3, bias=False, padding=1),
nn.BatchNorm1d(num_features=4, momentum=0.1, eps=2e-5),
nn.PReLU(num_parameters=4),
nn.Conv1d(in_channels=4, out_channels=1, kernel_size=1, bias=False),
)

def forward(self, x):
return self.net(x)

class DataSet(Dataset):
def __init__(self, x, y):
super().__init__()
self.x = torch.tensor(x).float()
self.y = torch.tensor(y).float()

def __len__(self):
return self.x.shape[0]

def __getitem__(self, item):
return self.x[item], self.y[item]

def train(train_data, val_data):
def ridiculous_val(n, v):
n = n.eval()
m = list()
for a, b in v:
p = n(a.cuda())
m.append(F.l1_loss(p, b.cuda()).item())
return np.mean(m)

net = BaseNet()
net = net.cuda()
opt = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.9)
scheduler = MultiStepLR(opt, [500, 800, 1100, 1400, 1700], gamma=0.1)
loss = nn.L1Loss()
min_loss = np.float("inf")
n_batch = 0
net = net.train()
for epoch in range(0, 99999):
for x, y in train_data:
x, y = x.cuda(), y.cuda()

pred = net(x)
loss_val = loss(y, pred)
loss_val.backward()
opt.step()
print("[{}],  {}".format(epoch, loss_val.item()))
n_batch += 1

if n_batch % 100 == 0:
_loss = ridiculous_val(net, val_data)
print("min ", _loss, min_loss)
if _loss < min_loss:
min_loss = _loss
print("saving...", min_loss)
torch.save(net.state_dict(), "net.pth")
net = net.train()
scheduler.step(epoch)

def val(some_data):
net = BaseNet()
net = net.cuda()
pred = np.empty(some_data.shape)
for i, x in enumerate(some_data):
pred[i, ...] = net(torch.from_numpy(x[None, :, :]).float().cuda()).cpu().detach().numpy().squeeze()
return pred

if __name__ == '__main__':
dst = np.load("diff.npy")  # shape (12000, 512)
src = np.load("src.npy")  # shape (12000, 512)
src, dst = src[:, None, :], dst[:, None, :]

data = DataSet(src, dst)
train_len, val_len = int(len(data) * 0.5), int(len(data) * 0.2)
test_len = len(data) - train_len - val_len
train_set, val_set, test_set = random_split(data, [train_len, val_len, test_len])
train_set = DataLoader(train_set, batch_size=1024, shuffle=True, pin_memory=True, drop_last=False)
val_set = DataLoader(val_set, batch_size=1024, shuffle=False, pin_memory=True, drop_last=False)

train(train_set, val_set)
``````

I observed that
epoch 33
`[param for param in net.parameters()][2].abs().max()` is 1e-6
`[param for param in net.parameters()][2].grad.abs().max()` is 7e-6.
`loss` is 0.263

epoch 67
`[param for param in net.parameters()][2].abs().max()` is 6e-11
`[param for param in net.parameters()][2].grad.abs().max()` is 2e-11
`loss` is 0.264

epoch 134
`[param for param in net.parameters()][2].abs().max()` is 3e-20
`[param for param in net.parameters()][2].grad.abs().max()` is 2e-20
`loss` is 0.264

After epoch 324, both of parameters and grad are 0, loss is 0.263.

Does it mean that this network think that function f(x) = 0 is the best mapping function for my data?
Or I do something wrong?

Here are two examples of my data, specifically, they are the output of another image encode network.

Maybe this is a stupid question, but I am really looking forward to getting some suggestions, thanks.