Losses and Accuracies are zero after first epoch


I am using PyTorch for multiclass classification. I’ve one hot encoded the labels. On running the model I am getting train and validation losses and accuracies for first epoch but for all the next epochs their value is coming out to be 0. Also I’m not using a dataloader, instead I’m using a function of my own to load the data in batches. I’m a beginner please guide me.

def train_model(model, criterion, optimizer, num_epochs=10):

for epoch in range(num_epochs):
print(‘Epoch {}/{}’.format(epoch+1, num_epochs))
print(’-’ * 10)

   for phase in ['train', 'val']:
        if phase == 'train':
        running_loss = 0.0
        running_corrects = 0.0
        for i,(inputs, labels) in enumerate(dataloaders[phase]):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _,lab1 = torch.max(labels.data, 1)
            loss = criterion(outputs, labels)
            if phase == 'train':

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == lab1)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = float(running_corrects/dataset_sizes[phase])

        print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
return model
device = torch.device("cpu")

model_ft = models.resnet50(pretrained=True).to(device)

for param in model_ft.parameters():
param.requires_grad = False

model_ft.fc = nn.Sequential(
    nn.Linear(2048, 1000),
     nn.Linear(1000, 136)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_ft.fc.parameters(), lr=0.001)    

model_ft = train_model(model_ft, criterion, optimizer, num_epochs=10)

One-hot encoded targets wouldn’t work with nn.CrossEntropyLoss, so I’m not sure why

loss = criterion(outputs, labels)

doesn’t raise an error.
Could you print the shapes of both tensors before passing them to the criterion?

Also, check the shapes and values of epoch_loss and epoch_acc to avoid accidental broadcasting etc.

How did you define dataloaders, if you are not using the built-in DataLoader class?

Thanks for the reply!
I’m using

loss = criterion(outputs, lab1)

not (outputs, labels). Sorry for the typo.

outputs = torch.Size([16,136])
lab1 = torch.Size([16])

epoch_loss = 43449.26647949219
epoch_acc.shape = torch.Size([])
train Loss: 3.4776 Acc: 0.1683

epoch_loss = 30073.398727416992
epoch_acc.shape = torch.Size([])
val Loss: 3.3855 Acc: 0.1740

I’m using the following function for dataloading:

def image_generator(file, label_file, batch_size = 16):
    size = len(file)
    for i in range(math.floor(size/batch_size)):
        batch_x = file[new_bs:(new_bs+batch_size)]
        batch_y = label_file[new_bs:(new_bs+batch_size)]
        new_bs += batch_size
        yield(batch_x, batch_y)
dataloaders = {'train': image_generator(X_train, y_train, batch_size = 16), 
                 'val': image_generator(X_val, y_val, batch_size = 16)

Here in image_generator function, X_train is a tensor and y_train is a NumPy array.
If there’s any other better method for dataloading please suggest, as I have all the images in single folder and I can’t use ImageFolder.

Hy DL_jain!

Loss is calculated between original labels and your output probability. So your loss should be
loss = criterion(output,labels) as mentioned by ptrblck

Hey, thanks for the reply!
I’m getting this error on running loss = criterion(output,labels):

RuntimeError: 1D target tensor expected, multi-target not supported

Also @ptrblck is right in saying in that loss = criterion(output, labels) will not work since labels are one-hot encoded. This is because the target in CrossEntropyLoss only accepts 1D tensor.

Thanks for the update.
Could you print the preds and lab1 tensors for the second epoch, which yields a zero accuracy and zero loss?
It would be interesting to see, how these values both can be calculated using the current code.

Can you help me @ptrblck, my program generate zero loss to in first epoch

def train_dataset(train_loader, val_loader, drivePath, type):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
    model = None
    if (type == 'custom'):
        model = VGG16_CUSTOM_NET()
        model = VGG16_ORI_NET() 
    print("[INFO] reset the weight before starting")
    model = model.to(device=device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr= lr) 

    for epoch in range(num_epochs): #I decided to train the model for 50 epochs
        loss_var = 0
        for idx, (images, labels) in enumerate(train_loader):
            images = images.to(device=device)
            labels = labels.to(device=device)
            ## Forward Pass
            scores = model(images)
            loss = criterion(scores,labels)
            loss_var += loss.item()
            print(f'Epoch [{epoch+1}/{num_epochs}] || Step [{idx+1}/{len(train_loader)}] || Loss:{loss_var/len(train_loader)} || Loss Item:{loss.item()}')
        print(f"Loss at epoch {epoch+1} || {loss_var/len(train_loader)}")

        with torch.no_grad():
            correct = 0
            samples = 0
            for idx, (images, labels) in enumerate(val_loader):
                images = images.to(device=device)
                labels = labels.to(device=device)
                outputs = model(images)
                _, preds = outputs.max(1)
                correct += (preds == labels).sum()
                samples += preds.size(0)
            print(f"accuracy {float(correct) / float(samples) * 100:.2f} percentage || Correct {correct} out of {samples} samples")
class VGG16_ORI_NET(nn.Module):
    def __init__(self):
        super(VGG16_ORI_NET, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)

        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.conv7 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)

        self.conv8 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.conv9 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.conv10 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)

        self.conv11 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.conv12 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.conv13 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc14 = nn.Linear(25088, 4096)
        self.fc15 = nn.Linear(4096, 4096)
        self.fc16 = nn.Linear(4096, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.maxpool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.maxpool(x)
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = F.relu(self.conv7(x))
        x = self.maxpool(x)
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv9(x))
        x = F.relu(self.conv10(x))
        x = self.maxpool(x)
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = F.relu(self.conv13(x))
        x = self.maxpool(x)
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc14(x))
        x = F.dropout(x, 0.5) #dropout was included to combat overfitting
        x = F.relu(self.fc15(x))
        x = F.dropout(x, 0.5)
        x = self.fc16(x)
        return x

If you are concerned about the zero loss during training, check the output of your model and the corresponding labels to see if the zero loss is expected. Maybe your model is overfitting to a single class and the current batch labels contain only this particular class.

Thank you for your reply.
I will check my model output and the corresponding labels