Why my model is'nt improving?

hi,
i have a classification mission with sequence data and i made a neural network for it but its loss isn’t changing during the epochs…

class Main:
    def __init__(self):
        self.batch_size = 10
        self.model = Net()

    def evaluate(self):
        trues = []
        preds = []
        loss_sum = 0
        loss_function = nn.MSELoss()
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(train_loader):
                if y.shape[1] == 1:
                    continue

                p = self.model(x)

                p = torch.argmax(p, dim=1).type(torch.float32)
                y = torch.squeeze(y.type(torch.float32))

                loss_sum += loss_function(p, y)
                trues += y
                preds += p
        print('loss =', loss_sum)
        print('accu =', accuracy_score(trues, preds))

    def train(self):
        loss_function = nn.MSELoss()
        optimizer = optim.Adam(self.model.parameters())
        self.model.zero_grad()

        for epoch in range(8):
            los_sum = 0
            for batch_idx, (x, y) in enumerate(train_loader):
                if y.shape[1] == 1:
                    continue

                self.model.zero_grad()

                p = self.model(x)

                p = torch.argmax(p, dim=1).type(torch.float32).requires_grad_(True)
                y = torch.squeeze(y.type(torch.float32))

                loss = loss_function(p, y)

                loss.backward()
                optimizer.step()

                los_sum += loss.item()

            if epoch % 1 == 0:
                print(epoch, 'train loss', los_sum)
                self.evaluate()

where arch is the folowing:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(len(REL_FEATURES), round(len(REL_FEATURES)/2), num_layers=2, batch_first=True)
        self.hidden2temp = nn.Linear(round(len(REL_FEATURES)/2), 11)

    def forward(self, x):
        week_size = x.shape[1]
        # print('1', x.shape)
        lstm_out, _ = self.lstm(x)  # .view(len(sentence), 1, -1)
        # print('2', lstm_out.shape)
        temp_prob = self.hidden2temp(lstm_out.view(week_size, -1))
        # print('3', temp_prob.shape)
        return temp_prob

i get that output:

0 train loss 1515.3066632635891
loss = tensor(1515.3066)
accu = 0.054489694030383
1 train loss 1515.3066632635891
loss = tensor(1515.3063)
accu = 0.054489694030383
2 train loss 1515.3066632635891
loss = tensor(1515.3064)
accu = 0.054489694030383
3 train loss 1515.3066632635891
loss = tensor(1515.3064)
accu = 0.054489694030383
4 train loss 1515.3066632635891
loss = tensor(1515.3066)
accu = 0.054489694030383

what am i doing wrong?
thanks.

You are detaching the computation graph here:

p = torch.argmax(p, dim=1).type(torch.float32).requires_grad_(True)

torch.argmax is not differentiable and you would have to use the model outputs to calculate the loss.

oh, thank you…
what should we use to find the maximum entry?

torch.argmax is the right method to find the index corresponding to the max. value. However, since it’s not differentiable you cannot use it to calculate the loss.