My multitask LSTM does not learn. Can someone help?

Hi I am trying to do classification and regression tasks together in multitsak setting. Problem that I am having is that my network does not learn anything. Loss is constant and predicted classes and are the same in every test case (0).

from torch.autograd import Variable
import torch.nn as nn

class MT_LSTM(nn.Module):
    def __init__(self):
        super(MT_LSTM, self).__init__()
        
        # LSTM layer
        self.lstm = nn.LSTM(6, 128)
        # Common dense layer
        self.common_linear = nn.Linear(128,128)
        
        # Regression spesific layers
        self.linearReg = nn.Linear(128, 128)
        self.linearReg_layer2 = nn.Linear(128, 1)
        
        # Classification spesific layers
        self.linearClas = nn.Linear(128, 1)
        self.classify = nn.Sigmoid()
    
    def forward(self, x):
        h_0 = Variable(torch.zeros(1, 1, 128)) 
        c_0 = Variable(torch.zeros(1, 1, 128)) 
        
        out, _ = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        out = out.view(-1, 128) #reshaping the data for Dense layer next
        x = self.common_linear(out)
        
        # Classificartion task spesific layers
        clas = self.classify(self.linearClas(x))
        
        # Regression task spesific layers
        reg = self.linearReg_layer2(self.linearReg(x))
        
        return [clas, reg]

mt_lstm = MT_LSTM()
print(mt_lstm)
mt_lstm.train()
learning_rate = 0.05
clas_criterion = torch.nn.CrossEntropyLoss()
reg_criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(mt_lstm.parameters(), lr=learning_rate) 
num_epochs = 10
for epoch in range(num_epochs):
    
    optimizer.zero_grad()
    op_c, op_r = mt_lstm(X_train.view(len(X_train),1,-1)) #forward pass
 
    # obtain the loss function
    loss_c = clas_criterion(op_c.view(-1), y_train_c.view(-1))
    loss_r = reg_criterion(op_r.view(-1), y_train_r.view(-1))
    total_loss = loss_c + loss_r
  
  
    total_loss.backward() #calculates the loss of the loss function
    optimizer.step() #improve from loss, i.e backprop

    print("Epoch: [{}]  loss: [{:.2f}]]".format(epoch+1,loss_c))


mt_lstm.eval()
clas, reg =mt_lstm(X_test.view(len(X_test),1,-1))

predictions = []
for prediction in clas:
    if prediction.item() >0.5:
        predictions.append(1)
    else:
        predictions.append(0)
clas_pred = torch.Tensor(predictions)

from sklearn.metrics import accuracy_score
print("MT LSTM classification accuracy:",accuracy_score(y_test_c.reshape(-1),clas_pred.reshape(-1)))

Output:

MT_LSTM(
  (lstm): LSTM(6, 128)
  (common_linear): Linear(in_features=128, out_features=128, bias=True)
  (linearReg): Linear(in_features=128, out_features=128, bias=True)
  (linearReg_layer2): Linear(in_features=128, out_features=1, bias=True)
  (linearClas): Linear(in_features=128, out_features=1, bias=True)
  (classify): Sigmoid()
)
Epoch: [1]  loss: [12761.62]]
Epoch: [2]  loss: [12761.62]]
Epoch: [3]  loss: [12761.62]]
Epoch: [4]  loss: [12761.62]]
Epoch: [5]  loss: [12761.61]]
Epoch: [6]  loss: [12761.61]]
Epoch: [7]  loss: [12761.62]]
Epoch: [8]  loss: [12761.61]]
Epoch: [9]  loss: [12761.62]]
Epoch: [10]  loss: [12761.62]]

MT LSTM classification accuracy: 0.4747191011235955

There are 2 classes 1 and 0.

nn.CrossEntropyLoss is used in multi-class classification use cases and expects raw logits while you are applying a sigmoid on the classification output, so remove it.