About resnet with batchsize=1,the val result only gives 1label

I tried to use resnet to train my dataset, after training,when i test the accuracy, i’ve found that when i set batchsize =1,the predicted labels are always“1”, but when i set batchsize =10, it seems right.
codes are as follows:
import os
import torch.nn as nn
import torch.utils.data
import torchvision
from torch.autograd import Variable
from torchvision import transforms
import torchvision.models as models
import time
import warnings
import sys
import matplotlib.pyplot as plt
import traceback
import logging

num_epochs = 1
batch_size = 1

#model = models.alexnet(pretrained=False)
model = models.resnet50(pretrained=False)
#for parma in model.parameters():

parma.requires_grad = False

model.fc = nn.Linear(2048,3)
#for index, parma in enumerate(model.classifier.parameters()):

if index == 6:

#model.parma.requires_grad = True
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(“model”, model)


#load data
data_transform = transforms.Compose([
transforms.Scale((224, 224), 2),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[
0.229, 0.224, 0.225])
train_dataset = torchvision.datasets.ImageFolder(root=’/home/zhao/PER-DER/Label_training/’,transform=data_transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=40, shuffle=True,num_workers=0)

val_dataset = torchvision.datasets.ImageFolder(root=’/home/zhao/PER-DER/class_val/’, transform=data_transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=10, shuffle=True,num_workers=0)
print(“load dataset done”)

#training data
epoch = 0
x_epoch = []
y_acc = []

for epoch in range(num_epochs):
batch_size_start = time.time()
running_loss = 0.0
for i, (inputs,labels) in enumerate(train_loader):
inputs = Variable(inputs).cuda()
labels = Variable(labels).cuda()
outputs = model(inputs)
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
running_loss += loss.item()

        print('Epoch [%d/%d], Loss: %.4f,need time %.4f'
                  % (epoch+1, num_epochs,  running_loss / (20000 / batch_size), time.time() - batch_size_start))

    torch.save(model, 'alexnet_model.pkl')
    print('save the training model')
    correct = 0
    total = 0
    for j, (images,labels) in enumerate(val_loader):
        batch_size_start = time.time()
        images = Variable(images).cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        print (predicted)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()
    print(" Val BatchSize cost time :%.4f s" % (time.time() - batch_size_start))
    print('Test Accuracy of the model on the %d Val images: %.4f' % (total, float(correct) / total))
    Acc = round((float(correct) / total), 3)

print(“training finish”)
torch.save(model, ‘resnet50_model.pkl’)
print(‘save the training model’)

Hello! You probably forgot to put the model in eval() mode. train() mode and batch size of 1 doesn’t make sense for resnet that uses batch norm

1 Like

Your words solved my confusion.Thanks