These two networks behave differently. First one works fine, the second one’s output does not make sense. Am I missing something in the second network? Single hidden layer with 2 units and 2 input features, single output.

Network 1

```
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer
self.predict = torch.nn.Linear(n_hidden, n_output) # output layer
def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x
```

Network 2

```
class Net2(torch.nn.Module):
def __init__(self, n_features, h_sizes, out_size):
super(Net2, self).__init__()
self.input = torch.nn.Linear(n_features, h_sizes[0])
self.hidden = torch.nn.ModuleList()
for k in range(len(h_sizes)-1):
self.hidden.append(torch.nn.Linear(h_sizes[k], h_sizes[k+1]))
# Output layer
self.out = torch.nn.Linear(h_sizes[-1], out_size)
def forward(self, x):
# Feedforward
x = F.relu(self.input(x))
for layer in self.hidden:
x = F.relu(layer(x))
output= self.out(x)
return output
```

Training code

```
def fit(net, x_train, y_train, epochs=20):
if isinstance(x_train, np.ndarray):
x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train).float()
print(x_train.data.shape, y_train.data.shape)
plt.ion() # something about plotting
for t in range(epochs):
prediction = net(x_train) # input x and predict based on x
loss = loss_func(prediction, y_train) # must be (1. nn output, 2. target)
optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if t % (epochs//4) == 0:
# plot and show learning process
plt.cla()
#plt.scatter(x_train.data.numpy(), y_train.data.numpy())
#plt.plot(x_train.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.scatter(prediction.data.numpy(), y_train.data.numpy())#, 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color': 'red'})
plt.pause(0.1)
plt.ioff()
plt.show()
return net, prediction.data.numpy()
def predict(net, x):
if isinstance(x, np.ndarray):
x = torch.from_numpy(x).float()
pred = net(x)
return pred.data.numpy()
optimizer = torch.optim.Adam(net.parameters(), lr=0.02)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
net = Net(n_feature=2, n_hidden=10, n_output=1).float() # define the network
net, pred = fit(net, x_train, y_train, epochs=2000)
net2 = Net2(2, h_sizes=[10], out_size=1) # define the network
print(net) # net architecture
net2 = net2.float()
net2, pred = fit(net2, x_train, y_train, epochs=2000)
```