I am reproduce the keras code from https://colab.research.google.com/github/yaringal/multi-task-learning-example/blob/master/multi-task-learning-example.ipynb
It has an input X, and produce two output Y1 and Y2. It used L2 loss and Adam for optimization. The weight log_var is learnable and after training, it should near 10 and 1. However, my code only provides 8 and 3. What is happening in my code? It looks the log_var does not update well
This is my code.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import itertools
def gen_data(N):
X = np.random.randn(N, Q)
w1 = 2.
b1 = 8.
sigma1 = 1e1 # ground truth
Y1 = X.dot(w1) + b1 + sigma1 * np.random.randn(N, D1)
w2 = 3
b2 = 3.
sigma2 = 1e0 # ground truth
Y2 = X.dot(w2) + b2 + sigma2 * np.random.randn(N, D2)
return X, Y1, Y2
class Net (nn.Module):
def __init__(self, nf_in, nf_in_out1, nf_out1, nf_out2):
super(Net, self).__init__()
self.nf_out1 = nf_out1
self.nf_out2 = nf_out2
self.FC0 = nn.Linear(nf_in, nf_in_out1)
self.FC1 = nn.Linear(nf_in_out1, nf_out1)
self.FC2 = nn.Linear(nf_in_out1, nf_out2)
def forward (self, input):
input = input.view(input.size(0), -1)
input = F.relu(self.FC0(input))
y_pred1 = self.FC1(input)
y_pred2 = self.FC2(input)
return y_pred1, y_pred2
# Custom loss layer
class CustomMultiLossLayer(nn.Module):
def __init__(self, nb_outputs=2):
super(CustomMultiLossLayer, self).__init__()
self.nb_outputs = nb_outputs
#self.log_vars = nn.Parameter(torch.zeros(nb_outputs))
self.log_vars1 = torch.nn.Parameter(torch.FloatTensor([0]))
self.log_vars2 = torch.nn.Parameter(torch.FloatTensor([0]))
self.mse = nn.MSELoss()
def forward(self, ys_true1, ys_pred1, ys_true2, ys_pred2):
loss = torch.exp(-self.log_vars1) * self.mse(ys_pred1, ys_true1) + self.log_vars1 + torch.exp(-self.log_vars2) * self.mse(ys_pred2, ys_true2) + self.log_vars2
print (torch.exp((self.log_vars1.data)**0.5), torch.exp((self.log_vars2.data)**0.5), loss.item(),self.log_vars2.item())
return loss
N = 100
nb_epoch = 2000
batch_size = 20
nb_features = 1024
Q = 1
D1 = 1 # first output
D2 = 1 # second output
X, Y1, Y2 = gen_data(N)
net = Net(Q, nb_features, D1, D2)
multi_loss = CustomMultiLossLayer()
optimizer = optim.Adam(itertools.chain(net.parameters(),multi_loss.parameters()))
for epoch in range (nb_epoch):
XY = list(zip(X,Y1,Y2))
np.random.shuffle(XY)
X,Y1,Y2 = zip(*XY)
X = np.array(X)
Y1 = np.array(Y1)
Y2 = np.array(Y2)
for i in range (5):
#index = np.random.choice(N, size=batch_size)
input = X[i*batch_size:batch_size*(i+1)]
y1 = Y1[i*batch_size:batch_size*(i+1)]
y2 = Y2[i*batch_size:batch_size*(i+1)]
input = torch.from_numpy(input).float()
y1 = torch.from_numpy(y1).float()
y2 = torch.from_numpy(y2).float()
y_pred1, y_pred2 = net(input)
optimizer.zero_grad()
loss = multi_loss(y1, y_pred1, y2, y_pred2)
loss.backward()
optimizer.step()
This is google colab link https://colab.research.google.com/drive/1_zsmQguerz0iy0J9Uu2Cs7oEHhj0QoXH