Accuracy decreases after reloading the model

when my accuracy is 98%, I save the state_dict. Then I load the state_dict, the accuracy is 89% in same dateset(not in train dataset). I find output ( model.eval() in train and load model mode.eval() )is not same .
this is my model struct:

class CNN_V4(nn.Module):
    def __init__(self, height=50, width=1500, class_num=20):
        super(CNN_V4, self).__init__()
        self.feature = nn.Sequential(
            nn.Conv2d(1, 6, (11, 51), (1, 25)),
            nn.BatchNorm2d(6),
            nn.ReLU(inplace=True),
            nn.Conv2d(6, 36, (9, 27), stride=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(36, 64, (3, 3)),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(3, stride=1),
            nn.Dropout(0.5, inplace=True),
        )
        height, width = self.compute_height_width(height=height, width=width)

        self.classifier = nn.Sequential(
            nn.Linear(64 * width * height, 2048),
            nn.Linear(2048, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 1024),
            nn.Dropout(0.1, inplace=True),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 777),
            nn.ReLU(inplace=True),
            nn.Linear(777, class_num)
        )

    def compute_height_width(self, height, width):
        height = ((height - 10) - 9) // 2 + 1
        height = (height - 2) - 2

        width = (width - 51) // 25 + 1
        width = (width - 27) // 2 + 1
        width = width - 2
        width = width - 2
        return height, width

    def forward(self, x):
        x = self.feature(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x

this is my train code:
eval_flag = False
    for e in range(epoch):
        model.train()
        running_loss = 0
        for step, data_label in enumerate(train_loader, start=0):
            data, labels = data_label
            optimizer.zero_grad()
            outputs = model(data.to(device))
            loss = loss_func(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
            # print statistics
            running_loss += loss.item()
            # print train process
            rate = (step + 1) / len(train_loader)
            a = "*" * int(rate * 50)
            b = "." * int((1 - rate) * 50)
            if loss < 0.5:
                eval_flag = True
            print("\rtrain loss: {:^4.2f}%[{}->{}]{:.6f}".format(int(rate * 100), a, b, loss), end="")
        print()
        if e > 30 or eval_flag:
            acc = 0.0

            result_array_list = []
            model.eval()
            with torch.no_grad():
                for test_data in test_loader:

                    d, l = test_data
                    outputs = model(d.to(device))

                    temp_array = np.array(outputs.cpu()).reshape(20)
                    result_array_list.append(temp_array)

                    predict_y = torch.max(outputs, dim=1)[1]
                    acc += (predict_y == l.to(device)).sum().item()


                val_acc = acc / val_num
                np.savetxt('models/result_server-{}.txt'.format(e), result_array_list)
                if val_acc > best_acc:
                    best_acc = val_acc
                    torch.save(model, 'models/{}-cnnv4-{}-{}-{}.path'.format(train_name_list, height, width, e))
                    models.save_checkpoint(best_acc, model, optimizer, e,'{}-cnnv4-{}-{}-{}.path'.format(train_name_list, height, width, e))
                print('[{}/{}] train_loss:{:.5} test_acc:{:.5}'.format(e, epoch, running_loss / step, val_acc))

this is my predic code:
 all_label = []
    all_prediction = []
    i = 1
    j = 0
    acc = 0
    result_array_list = []

    for data_, label in data_loader:
        score = model(data_.to(device))
        temp_array = np.array(score.cpu()).reshape(20)
        result_array_list.append(temp_array)
        predict_y = torch.max(score, dim=1)[1]

        predict = torch.softmax(torch.squeeze(score), dim=0)
        predict_cla = torch.argmax(predict).cpu().numpy()
        if predict_cla == label.item():
            j += 1
        acc += (predict_y == label.to(device)).sum().item()
        prediction = torch.argmax(score, dim=1)
        all_label.append(label.cpu().numpy())
        all_prediction.append(prediction.cpu().numpy())
        print('\r {}/{}'.format(i, len(data_loader)), end='')
        i += 1
    print()

I cannot reproduce the issue using your model and a dummy training script:

# script1.py

# Setup
model = CNN_V4()
data = torch.randn(10, 1, 170, 1000)
target = torch.randint(0, 20, (10,))

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Training
for epoch in range(10):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    print('epoch {}, loss {}'.format(epoch, loss.item()))

# Save reference
model.eval()
output_reference = model(data)
checkpoint = {
    'model': model.state_dict(),
    'data': data,
    'output': output_reference,
}
torch.save(checkpoint, 'tmp.pth')


# script2.py
# Setup
model = CNN_V4()
checkpoint = torch.load('tmp.pth')
model.load_state_dict(checkpoint['model'])
data = checkpoint['data']
output_ref = checkpoint['output']

# Compare
model.eval()
output = model(data)
print((output - output_ref).abs().max())
> tensor(0., grad_fn=<MaxBackward1>)

As you can see, the restored model returns the same output as the model in the training script.
Could you try to narrow down the issue and try to post an executable code snippet to reproduce the issue?

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier. I’ve formatted your code, so that I could copy-paste it. :wink: