Why my model is'nt improving?

111381 · September 7, 2020, 11:48am

hi,
i have a classification mission with sequence data and i made a neural network for it but its loss isn’t changing during the epochs…

class Main:
    def __init__(self):
        self.batch_size = 10
        self.model = Net()

    def evaluate(self):
        trues = []
        preds = []
        loss_sum = 0
        loss_function = nn.MSELoss()
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(train_loader):
                if y.shape[1] == 1:
                    continue

                p = self.model(x)

                p = torch.argmax(p, dim=1).type(torch.float32)
                y = torch.squeeze(y.type(torch.float32))

                loss_sum += loss_function(p, y)
                trues += y
                preds += p
        print('loss =', loss_sum)
        print('accu =', accuracy_score(trues, preds))

    def train(self):
        loss_function = nn.MSELoss()
        optimizer = optim.Adam(self.model.parameters())
        self.model.zero_grad()

        for epoch in range(8):
            los_sum = 0
            for batch_idx, (x, y) in enumerate(train_loader):
                if y.shape[1] == 1:
                    continue

                self.model.zero_grad()

                p = self.model(x)

                p = torch.argmax(p, dim=1).type(torch.float32).requires_grad_(True)
                y = torch.squeeze(y.type(torch.float32))

                loss = loss_function(p, y)

                loss.backward()
                optimizer.step()

                los_sum += loss.item()

            if epoch % 1 == 0:
                print(epoch, 'train loss', los_sum)
                self.evaluate()

where arch is the folowing:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(len(REL_FEATURES), round(len(REL_FEATURES)/2), num_layers=2, batch_first=True)
        self.hidden2temp = nn.Linear(round(len(REL_FEATURES)/2), 11)

    def forward(self, x):
        week_size = x.shape[1]
        # print('1', x.shape)
        lstm_out, _ = self.lstm(x)  # .view(len(sentence), 1, -1)
        # print('2', lstm_out.shape)
        temp_prob = self.hidden2temp(lstm_out.view(week_size, -1))
        # print('3', temp_prob.shape)
        return temp_prob

i get that output:

0 train loss 1515.3066632635891
loss = tensor(1515.3066)
accu = 0.054489694030383
1 train loss 1515.3066632635891
loss = tensor(1515.3063)
accu = 0.054489694030383
2 train loss 1515.3066632635891
loss = tensor(1515.3064)
accu = 0.054489694030383
3 train loss 1515.3066632635891
loss = tensor(1515.3064)
accu = 0.054489694030383
4 train loss 1515.3066632635891
loss = tensor(1515.3066)
accu = 0.054489694030383

what am i doing wrong?
thanks.

ptrblck · September 10, 2020, 8:53am

You are detaching the computation graph here:

p = torch.argmax(p, dim=1).type(torch.float32).requires_grad_(True)

torch.argmax is not differentiable and you would have to use the model outputs to calculate the loss.

111381 · September 11, 2020, 7:41am

oh, thank you…
what should we use to find the maximum entry?

ptrblck · September 14, 2020, 8:21am

torch.argmax is the right method to find the index corresponding to the max. value. However, since it’s not differentiable you cannot use it to calculate the loss.