Performing evaluation on the test set

I have implemented the evaluation of the test set as follows:

n_epochs = 1000 
batch_size = 32 
loss_train=[]

for epoch in range(n_epochs):

    permutation1 = torch.randperm(trainX.size()[0])
    for i in range(0,trainX.size()[0], batch_size):

        optimizer.zero_grad()
        indices1 = permutation1[i:i+batch_size]
        batch_x_train, batch_y_train = trainX[indices1], trainY[indices1]

        model.train()
        outputs = model.forward(batch_x_train)
        train_loss = criterion(outputs,batch_y_train)
        train_loss.backward()
        optimizer.step()
        loss_train.append(train_loss.item())

    model.eval()
    y_pred = model(valX)
    val_loss = criterion(y_pred, valY)        

    avg_train_loss = sum(loss_train) / len(loss_train)
    print('epoch {},  train loss {},  val loss{}'.format(epoch, avg_train_loss, val_loss)) 

model.eval()
y_pred = model(testX)
test_loss = criterion(y_pred, testY) 
print('test loss is {}'.format(test_loss))

Is this the correct way to evaluate the model on the test set? Also, where and how should I save the model in this case ( torch.save() or model.state_dict() ) if in the future all I would want to do is to load the model and just use it on the test set?

Assuming valX is a tensor with the complete validation data, then this approach would be generally right, but you might of course run out of memory, if this tensor is too large.
The usual approach would be to wrap it in a Dataset and DataLoader and get the predictions for each batch. The data loading tutorial gives you some information how to create a Dataset and DataLoader.

Also, to save memory during evaluation and test, you could wrap the validation and test code into a with torch.no_grad() block.

Do you mean to say that for evaluation and test set the code should be:

with torch.no_grad():
    model.eval()
    y_pred = model(valX)
    val_loss = criterion(y_pred, valY)

and

with torch.no_grad():
    model.eval()
    y_pred = model(test)
    test_loss = criterion(y_pred, testY)

Also, how about the answer to the second part of the question where I ask about the best way to save the model?

The usage of no_grad() and model.eval() is correct and if you are not running our of memory using the complete datasets, your approach should work.

To store the model, you should save the state_dict() as described here.

1 Like

hi @ptrblck , I have a encoder decoder network where I am adding a softmax layer at the encoder. The training loss is converging nicely but when I run the evaluation on the same train data, it is giving very poor results. Can you check once plz if the test evaluation part is correct?

def forward(self, x):

    z = self.encode(x)
    logits = self.classifier(z)
    reconstruction = self.decode(z) 
    return logits, reconstruction

from torch.utils.data import Dataset, DataLoader

class DataBuilder(Dataset):

def __init__(self, path):
    #self.x, self.standardizer, self.wine = load_data(DATA_PATH)
    self.x, self.y = load_data(DATA_PATH)
    self.len=self.x.shape[0]
def __getitem__(self,index):      
    return self.x[index], self.y[index]
def __len__(self):
    return self.len

data_set=DataBuilder(DATA_PATH)

trainloader=DataLoader(dataset=data_set,batch_size=1)

accs =

with torch.no_grad():

for batch_idx, data in enumerate(trainloader):

    x_data = data[0]
    y_data = data[1]

    recon_batch, logits_batch = model(x_data)
    acc = (logits_batch.round() == y_data).float().mean()
    accs.append(acc)

print(“%.2f%% (+/- %.2f%%)” % (np.mean(accs)*100, np.std(accs)*100))

I’m not familiar with your use case and thus don’t know if the accuracy calculation makes sense or not. Check the raw outputs before rounding and compare some of these outputs manually to their targets to understand what your model is trying to predict.

hi @ptrblck , thanks for your reply. I found out that my issue is with the architecture itself and not inference. I have a simple encoder-decoder model and I am trying to add a softmax classifier layer from the encoder so that I can optimize the classification and reconstruction loss jointly.

class Autoencoder(nn.Module):
def init(self,D_in,H=50,H2=12,latent_dim=7):

    #Encoder
    super(Autoencoder,self).__init__()
    self.linear1=nn.Linear(D_in,H)
    self.lin_bn1 = nn.BatchNorm1d(num_features=H)
    self.linear2=nn.Linear(H,H2)
    self.lin_bn2 = nn.BatchNorm1d(num_features=H2)
    self.linear3=nn.Linear(H2,H2)
    self.lin_bn3 = nn.BatchNorm1d(num_features=H2)
    self.num_class = 3

    self.fc1 = nn.Linear(H2, latent_dim)
    self.fc3 = nn.Linear(latent_dim, latent_dim)
    self.fc4 = nn.Linear(latent_dim, H2)

# Decoder

    self.linear4=nn.Linear(H2,H2)
    self.lin_bn4 = nn.BatchNorm1d(num_features=H2)
    self.linear5=nn.Linear(H2,H)
    self.lin_bn5 = nn.BatchNorm1d(num_features=H)
    self.linear6=nn.Linear(H,D_in)
    self.lin_bn6 = nn.BatchNorm1d(num_features=D_in)

    self.relu = nn.ReLU()
    self.classifier = nn.Linear(latent_dim, self.num_class)
   # self.classifier = nn.Softmax()

def encode(self, x):
    lin1 = self.relu(self.lin_bn1(self.linear1(x)))
    lin2 = self.relu(self.lin_bn2(self.linear2(lin1)))
    lin3 = self.relu(self.lin_bn3(self.linear3(lin2)))


    fc1 = F.relu(self.fc1(lin3))
    return fc1
def decode(self, z):
    fc3 = self.relu(self.fc3(z))
    fc4 = self.relu(self.fc4(fc3))#.view(128, -1)

    lin4 = self.relu(self.lin_bn4(self.linear4(fc4)))
    lin5 = self.relu(self.lin_bn5(self.linear5(lin4)))
    return self.lin_bn6(self.linear6(lin5))



def forward(self, x):
    # mu, logvar = self.encode(x)
    z = self.encode(x)
    logits = self.classifier(z)
    reconstruction = self.decode(z)
    #z = torch.vstack(z)
    #print ("Z shape:", z.shape)
    #print ("Z shape:", type(z))
    # self.decode(z) ist später recon_batch, mu ist mu und logvar ist logvar
    #return self.decode(z), mu, logvar
    return logits, reconstruction

class customLoss(nn.Module):
def init(self):
super(customLoss, self).init()
self.mse_loss = nn.MSELoss(reduction=“sum”)
self.classification_criterion = nn.CrossEntropyLoss()

# x_recon ist der im forward im Model erstellte recon_batch, x ist der originale x Batch, mu ist mu und logvar ist logvar
#def forward(self, x_recon, x, mu, logvar):
def forward(self, x_recon, x, logits, targets):
    loss_MSE = self.mse_loss(x_recon, x)
    
    #loss_KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    classification_loss = self.classification_criterion(logits, torch.max(targets, 1)[1])

    return loss_MSE + classification_loss

def train(epoch):
model.train()
train_loss = 0
for batch_idx, data in enumerate(trainloader):
data = data.to(device)
#print (“data:”, data)
x_train = data[0]
y_train = data[1]

   # print ("x train:", x_train.shape)
    print ("y train:", y_train)


    optimizer.zero_grad()
    # recon_batch, mu, logvar = model(data)
    # loss = loss_mse(recon_batch, data, mu, logvar)

    logits,recon_batch  = model(x_train)
    print ("logits train:", logits)

    _, predicted = torch.max(logits.data, 1)
    print ("predicted:", predicted)

   # print ("logits shape:", logits.shape)
   # print ("recon_batch shape:", recon_batch.shape)
    loss = loss_mse(recon_batch, x_train, logits, y_train)

    loss.backward()
    train_loss += loss.item()
    optimizer.step()
if epoch % 200 == 0:
    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(trainloader.dataset)))
    train_losses.append(train_loss / len(trainloader.dataset))

When I print it, the outputs of CE loss are not probabilities. And I see the prediction always as the same class(total classes are 3).

logits train: tensor([[ 0.4869, -0.6447, -0.6673],
[ 0.8050, -1.0218, -0.6015],
[ 0.4048, -0.5664, -0.5431],
[ 0.8462, -1.0309, -0.6237]], grad_fn=)
predicted: tensor([0, 0, 0, 0])
y train: tensor([[0],
[2],
[2],
[1]])
logits train: tensor([[ 0.4088, -0.5317, -0.2215],
[ 0.7658, -0.9260, -0.5844],
[ 0.7539, -0.9134, -0.5994],
[ 0.4659, -0.6436, -0.7550]], grad_fn=)
predicted: tensor([0, 0, 0, 0])
y train: tensor([[0],
[0],
[1],
[1]])

Can you please tell me what am I doing wrong here. Expected o/ps should be probabilities and also why it is predicting the same class always? Thank you so much!

That’s not the case since nn.CrossEntropyLoss expects raw logits (which are unbound) and will apply F.log_softmax internally.

Your model seems to be overfitting to class0 so check if this is the majority class and use a weighted sampling or weighted loss if that’s the case to counter this effect.

hi @ptrblck , thank you for the reply. I could solve the issue.