I’m having an issue with my DNN model.
During train phase, the accuracy is 0.968 and the loss is 0.103, but during test phase with model.eval(), the accuracy is 0 and the running corrects is 0.
def train(model, device, train_loader, criterion, optimizer, scheduler, epoch, iter_meter, experiment):
model.train()
liveloss = PlotLosses()
data_len = len(train_loader.dataset)
with experiment.train():
logs = {}
running_loss = 0.0
running_corrects = 0
for batch_idx, _data in enumerate(train_loader):
features, labels = _data[:][:,:,:-1], _data[..., -1]
features = features.permute(0, 2, 1)
features, labels = features.to(device), labels.to(device)
optimizer.zero_grad()
output = model(features)
loss = criterion(output, torch.max(labels, 1)[1])
loss.backward()
experiment.log_metric('loss', loss.item(), step=iter_meter.get())
experiment.log_metric('learning_rate', scheduler.get_last_lr(), step=iter_meter.get())
optimizer.step()
scheduler.step()
iter_meter.step()
_, preds = torch.max(output, 1)
running_loss += loss.detach() * features.size(0)
running_corrects += torch.sum(preds == torch.max(labels, 1)[1])
epoch_loss = running_loss / len(train_loader.dataset)
epoch_acc = running_corrects.float() / len(train_loader.dataset)
logs['log loss'] = epoch_loss.item()
logs['accuracy'] = epoch_acc.item()
liveloss.update(logs)
liveloss.send()
iter_meter = IterMeter()
for epoch in range(1, epochs + 1):
train(model, device, train_loader, criterion, optimizer, scheduler, epoch, iter_meter, experiment)
The evaluation script is:
def test(model, device, tst_loader, criterion, epoch, iter_meter, experiment):
print('\nevaluating...')
model.eval()
test_loss = 0
liveloss = PlotLosses()
data_len = len(tst_loader.dataset)
with experiment.test():
with torch.no_grad():
logs = {}
running_loss = 0.0
running_corrects = 0
for batch_idx, _data in enumerate(tst_loader):
features, labels = _data[:][:,:,:-1], _data[..., -1]
features = features.permute(0, 2, 1)
features, labels = features.to(device), labels.to(device)
output = model(features)
loss = criterion(output, torch.max(labels, 1)[1])
test_loss += loss.item() / len(tst_loader)
experiment.log_metric('loss', loss.item(), step=iter_meter.get())
iter_meter.step()
_, preds = torch.max(output, 1)
running_loss += loss.detach() * features.size(0)
running_corrects += torch.sum(preds == torch.max(labels, 1)[1])
epoch_loss = running_loss / len(tst_loader.dataset)
epoch_acc = running_corrects.float() / len(tst_loader.dataset)
logs['log loss'] = epoch_loss.item()
logs['accuracy'] = epoch_acc.item()
liveloss.update(logs)
liveloss.send()
iter_meter = IterMeter()
for epoch in range(1, epochs + 1):
test(model, device, tst_loader, criterion, epoch, iter_meter, experiment)
Is there something with the code?