Different inputs result in same prediction, unsuccessful training?

I have written two scripts to predict an array from either an array or a matrix. But the loss function seems not to converge. when i tested the learning model with different input, i got same predictions.
It seems i have made same mistakes on these two problems, could anyone help me?

The codes are below:

  1. 1-d to 1-d model
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import sys
import scipy.io as sio

Nt = 1000
Np = 200
Nalp = Np
t = np.linspace(0.8, 1.2, Nt)*150
p = np.linspace(-0.5, 0.5, Np)

# q = theta
q = np.zeros((Nt, Np+1))
M = np.zeros((Nt, Np))
alp = np.random.rand(Nalp)

for i in range(Nt):
    for j in range(Np):
        q[i, j] = 0.5 * t[i] * p[j]**2 + alp[j]
    q[i, Np] = t[i]

# M1 is gaussian distribution
M10 = 15.5
a1 = 3.0
b1 = 0.2
c1 = 0.5

M20 = 3
a2 = 1.0
b2 = -0.2
c2 = 0.5

M1 = M10*np.exp( -(a1*np.square(p) + b1*abs(p) + c1)  )
M2 = M20*np.exp( -a2*np.square(p) + b2*p + c2)
Merr = np.exp(-np.square(p)*3)
# plt.plot(p, Merr)
# plt.plot(p, M1)
# plt.plot(p, M2)
# plt.show()
# sys.exit()

for it in range(Nt):
    for ip in range(Np):
        M[it,ip] = np.square(M1[ip]) + np.square(M2[ip]) + np.abs(M1[ip]) * np.abs(M2[ip])*np.cos(q[it, ip])
    M[it,:] = M[it,:]/M[it,:].max()# + Merr[ip]

x = torch.from_numpy(q).float()
y = torch.from_numpy(M).float()# + 0.2 * torch.rand(x.size())

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden1, n_hidden2, n_output):
        super(Net, self).__init__()
        self.hidden1 = torch.nn.Linear(n_feature, n_hidden1)
        self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2)
        # self.dropped = torch.nn.Dropout(0.5)
        self.predict = torch.nn.Linear(n_hidden2, n_output)

    def forward(self, x):
        # x = F.relu(self.dropped(self.hidden1(x)))      # activation function for hidden layer
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = self.predict(x)
        return x

net = Net(n_feature=Np, n_hidden1=10*Np, n_hidden2=10*Np, n_output=Np+1)     # define the network
# print(net)  # net architecture
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.8)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

for t in range(10):
    prediction = net(y)
    loss = loss_func(prediction, x)
    # for it in range(Nt):
    #     loss = loss + loss_func(prediction[it, :], x[it, :])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 2 == 0:
        print('epoch=', t, 'Loss=', loss.data.numpy())
        print('t=', prediction.detach().numpy()[0:Nt:50, Np])
        sys.stdout.flush()

# print('q=', q)
# print('prediction = ', prediction)
# print('q-prediction =', q[:, Np] - prediction.data.numpy()[:, Np])
# for i in range(5):
# plt.plot(p, q - )

torch.save(net.state_dict(), 'net_params.pkl')
net.load_state_dict(torch.load('net_params.pkl'))


# data_hmr = sio.loadmat(u'exam1s.mat')
# M = data_hmr['inten']
# for i in range(len(M[:, 1])):
#     M[i, :] = M[i, :]/M[i, :].max()

# prediction = net(torch.from_numpy(M).float())

Ntest = 3

t = np.linspace(0.8, 1.2, Ntest)*150
p = np.linspace(-0.5, 0.5, Np)

# q = theta
qtest = np.zeros((Ntest, Np+1))
Mtest = np.zeros((Ntest, Np))
alp = np.random.rand(Nalp)

for i in range(Ntest):
    for j in range(Np):
        qtest[i, j] = 0.5 * t[i] * p[j]**2 + alp[j]
    qtest[i, Np] = t[i]

for it in range(Ntest):
    for ip in range(Np):
        Mtest[it,ip] = np.square(M1[ip]) + np.square(M2[ip]) + np.abs(M1[ip]) * np.abs(M2[ip])*np.cos(qtest[it, ip])
    Mtest[it,:] = Mtest[it,:]/Mtest[it,:].max()# + Nois

prediction = net(torch.from_numpy(Mtest).float())

plt.figure()
for i in range(Ntest):
    plt.subplot(311)
    plt.plot(p, qtest[i,0:Np])
    # print('t0=', qtest[i, Np])
    plt.title(r"correct $\theta$(p)")
    plt.subplot(312)
    plt.plot(p, Mtest[i,:])
    plt.title(r"the spectrum PEMD(p)")
    plt.subplot(313)
    plt.plot(p, prediction.detach().numpy()[i,0:Np])
    plt.title(r"learning result $\theta$(p)")
    # print('t=', prediction.detach().numpy()[i, Np])

plt.tight_layout()
plt.show()
  1. 2-d to 1-d model
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as Data

import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from numpy.fft import fft, ifft, fftshift

import pandas as pd

import sys
import scipy.io as sio

def smooth(x,window_len=6,window='hanning'):
    s=np.r_[x[window_len-1:0:-1],x,x[-1:-window_len:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
    return y[2:-3]

class FrogNet:
    def __init__(self):
        super(FrogNet, self).__init__()
        pass

# N_out = (N_in + 2p)/s + (n_k -1)
# N_out = (N_in - N_kernal + 2p)/s + 1
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape(1, 64, 64)
            nn.Conv2d(
                1, 32, 4, 2, 1  # N_channel_1, N_channel_out, n_k, s, p
            ),
            nn.ReLU()                # output shape(32, 32, 32), 36 = (64-4+2*1)/2 + 1
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                32, 32, 2, 2, 1
            ),
            nn.ReLU()               # output shape(32, 17, 17), 20 = (32-2+2*1)/2 + 1
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(
                32, 32, 1, 1, 1
            ),
            nn.ReLU()               # output shape(32, 17, 17), 19 = (17-1+2*1)/1 + 1
        )
        self.hidden = nn.Linear(32*19*19, 512)
        self.out = nn.Linear(512, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        hidden = self.hidden(x)
        output = self.out(hidden)
        return output


# E = torch.ones(2, 2, requires_grad=True)
Nt = 64
Trange = 200. # fs

t = np.linspace(-Trange/2., Trange/2., Nt)
w = np.linspace(-1, 1, Nt)*2*np.pi/Trange
dt = 2*Trange/Nt
dw = 2*np.pi/Trange

Nlearn = 200
images = np.zeros((Nlearn, Nt, Nt))
labels = np.zeros((Nlearn, Nt*2))
Sw = signal.gaussian(Nt, std = 10)

for idx_learn in range(Nlearn):
    qw = (np.random.rand(Nt)-0.5)*20#4*np.pi
    qw = qw * signal.gaussian(Nt, std=6)
    # qw =  fft(ifft(fftshift(qw) * signal.gaussian(Nt, std=8)))

    Ew = np.sqrt(Sw) * np.exp(1j*qw)
    Et = fftshift(ifft(Ew))

    St = np.abs(Et) * signal.gaussian(Nt, std=8)
    St = St/St.max()
    qt = np.angle(Et)
    qt = smooth(qt)
    # qt = smooth(smooth(smooth(qt)))
    # qt = qt - qt[Nt//2]

    Et = np.sqrt(St) * np.exp(1j*qt)

    # plt.plot(t, St, label="amp")
    # plt.plot(t, qt, label="phase")
    # plt.show()
    # sys.exit()
    # print(len(smooth(Ew)))

    # plt.legend()
    # plt.show()


    delay = t
    Edelay = np.zeros(Nt)

    frog = np.zeros((Nt, Nt))


    for i in range(Nt):
        tau = delay[i]
        if tau < 0:
            Edelay = np.roll(Et, Nt//2-i)
            Edelay[:Nt//2-i] = 0.

        if tau > 0:
            Edelay = np.roll(Et, Nt//2-i)
            Edelay[Nt//2-i-1:] = 0.
        frog[i, :] = np.abs(fftshift(fft(Et * Edelay)))**2
    images[idx_learn, :, :] = frog
    labels[idx_learn, :] = np.append(np.real(Et), np.imag(Et))

# w = 1000*w
# plt.imshow(np.transpose(frog), interpolation='bessel', origin='lower', 
#             extent=[t.min(), t.max(), w.min(), w.max()],
#             aspect='auto', cmap='terrain')
# plt.show()

images.tofile("images.bin")
labels.tofile("labels.bin")
# class MyDataset(Dataset):
#     def __init__(self, images, labels):
#         self.images = images
#         self.labels = labels

#     def __getitem__(self, index):
#         img, target = self.images[index], self.labels[index]
#         return img, target

#     def __len__(self):
#         return len(self.images)

images = torch.from_numpy(images)
labels = torch.from_numpy(labels).type(torch.FloatTensor)
images = torch.unsqueeze(images, dim=1).type(torch.FloatTensor)

cnn = CNN()
LR = 0.01
optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_func = nn.L1Loss()

BATCH_SIZE = 50
dataset = Data.TensorDataset(images, labels)

dataloader = DataLoader(dataset, num_workers=0, batch_size=BATCH_SIZE, shuffle=False)

EPOCH = 50
for epoch in range(EPOCH):
    for step, (x, y) in enumerate(dataloader):
        output = cnn(x)

        loss = loss_func(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 5 == 0:
        print('|Epoch = ', epoch, '|loss=', loss.data.numpy())


# torch.save(cnn.state_dict(), 'net_params.pkl')   # 只保存网络中的参数 (速度快, 占内存少)
cnn.load_state_dict(torch.load('net_params.pkl'))

test_output = cnn(images[:10])
# pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(test_output[:10], 'prediction number')
print(labels[:10].numpy(), 'real number')
for i in range(3):
    plt.plot(labels[i].data.numpy()+i*0.5, label='real')
    plt.plot(test_output[i].data.numpy()+i*0.5, label='prediction')
plt.legend()
plt.show()