Hi I am trying to do classification and regression tasks together in multitsak setting. Problem that I am having is that my network does not learn anything. Loss is constant and predicted classes and are the same in every test case (0).
from torch.autograd import Variable
import torch.nn as nn
class MT_LSTM(nn.Module):
def __init__(self):
super(MT_LSTM, self).__init__()
# LSTM layer
self.lstm = nn.LSTM(6, 128)
# Common dense layer
self.common_linear = nn.Linear(128,128)
# Regression spesific layers
self.linearReg = nn.Linear(128, 128)
self.linearReg_layer2 = nn.Linear(128, 1)
# Classification spesific layers
self.linearClas = nn.Linear(128, 1)
self.classify = nn.Sigmoid()
def forward(self, x):
h_0 = Variable(torch.zeros(1, 1, 128))
c_0 = Variable(torch.zeros(1, 1, 128))
out, _ = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
out = out.view(-1, 128) #reshaping the data for Dense layer next
x = self.common_linear(out)
# Classificartion task spesific layers
clas = self.classify(self.linearClas(x))
# Regression task spesific layers
reg = self.linearReg_layer2(self.linearReg(x))
return [clas, reg]
mt_lstm = MT_LSTM()
print(mt_lstm)
mt_lstm.train()
learning_rate = 0.05
clas_criterion = torch.nn.CrossEntropyLoss()
reg_criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(mt_lstm.parameters(), lr=learning_rate)
num_epochs = 10
for epoch in range(num_epochs):
optimizer.zero_grad()
op_c, op_r = mt_lstm(X_train.view(len(X_train),1,-1)) #forward pass
# obtain the loss function
loss_c = clas_criterion(op_c.view(-1), y_train_c.view(-1))
loss_r = reg_criterion(op_r.view(-1), y_train_r.view(-1))
total_loss = loss_c + loss_r
total_loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
print("Epoch: [{}] loss: [{:.2f}]]".format(epoch+1,loss_c))
mt_lstm.eval()
clas, reg =mt_lstm(X_test.view(len(X_test),1,-1))
predictions = []
for prediction in clas:
if prediction.item() >0.5:
predictions.append(1)
else:
predictions.append(0)
clas_pred = torch.Tensor(predictions)
from sklearn.metrics import accuracy_score
print("MT LSTM classification accuracy:",accuracy_score(y_test_c.reshape(-1),clas_pred.reshape(-1)))
Output:
MT_LSTM(
(lstm): LSTM(6, 128)
(common_linear): Linear(in_features=128, out_features=128, bias=True)
(linearReg): Linear(in_features=128, out_features=128, bias=True)
(linearReg_layer2): Linear(in_features=128, out_features=1, bias=True)
(linearClas): Linear(in_features=128, out_features=1, bias=True)
(classify): Sigmoid()
)
Epoch: [1] loss: [12761.62]]
Epoch: [2] loss: [12761.62]]
Epoch: [3] loss: [12761.62]]
Epoch: [4] loss: [12761.62]]
Epoch: [5] loss: [12761.61]]
Epoch: [6] loss: [12761.61]]
Epoch: [7] loss: [12761.62]]
Epoch: [8] loss: [12761.61]]
Epoch: [9] loss: [12761.62]]
Epoch: [10] loss: [12761.62]]
MT LSTM classification accuracy: 0.4747191011235955
There are 2 classes 1 and 0.