Confusing Accuracy of ResNet18 and ResNeXt50

Hey folks,

I tried to train a resnet18 and resnext50 on two different data sets with 20 classes each. Surprisingly, for the first data sets I get nearly the same accuracy after optimising both networks (early stopping is implemented). For the second data set, the accuracy of the resnext50 is even lower. In addition, resnet18 performed better on the second data set than on the first one, which was expected.

So I am not sure if my costumised standard resnext50 is implemented, correctly. Do I have any bugs in my code? Or is there a bug in the implementation of resnext50 or even another logical explanaition, like resnext50 nearly memorises the images and as a result the test set containing images, the network has never seen are too different to the training and validation images?

torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# define hyper-parameters
num_epochs = 10
learning_rate = 0.001
batchsize = 8


# initialization of the weights with uniform distribution
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
        

resnext50 = torch.hub.load('pytorch/vision:v0.6.0', 'resnext50_32x4d', pretrained=False)
resnext50.apply(init_weights)    # initialize the weights with uniform distribution
resnext50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)    # first layer for binary/gray jpg input
resnext50.fc = nn.Linear(2048, 20)    # last layer for 2048 in_features and 20 classes

resnext50.to(device)


train_logs = {'loss' : [], 'accuracy' : [], 'time' : []}
val_logs = {'loss' : [], 'accuracy' : [], 'time' : []}
cl_lr = {'loss' : [], 'lr' : []}


"""
2) Definition of directories
"""
# dir_train = 'D:/Benutzer/ml394/RPs/train_resized/'
# dir_test = 'D:/Benutzer/ml394/RPs/test_resized/'
dir_train = 'D:/Benutzer/ml394/DPs/train_resized/'
dir_test = 'D:/Benutzer/ml394/DPs/test_resized/'
dir_val = 'D:/Benutzer/ml394/DPs/val_resized/'


imgs = os.listdir(dir_train)


"""
3) Define costum dataset 
"""
class DPDataset(Dataset):
    def __init__(self, imgs, mode, transforms=None):
        self.imgs = imgs
        self.mode = mode
        self.transforms = transforms
        
    def __getitem__(self, idx):
        image_name = self.imgs[idx]
        
        if self.mode == 'train' or self.mode == 'val':
            # label = image_name.split('_')[1]    # extract AFl class
            # label = int(label.split('AFl')[1])
            # label = image_name.split('_')[0]    # extract macro group
            # label = int(label.split('MG')[1])
            label = image_name.split('_')[0]    # extract macro group
            label = int(label.split('MG')[1])
            label = torch.tensor(label, dtype = torch.long)    # convert into tensor
            
            img = sio.loadmat(dir_train + image_name)    # open mat file
            # img = img['RP']    # extract RP
            img = img['Dist']    # extract DP
            img = img.reshape(1, 350, 350)    # reshape binary file
            img = torch.ByteTensor(img)
            img = img.type(torch.FloatTensor)    # convert to FloatTensor
        
        elif self.mode == 'test':
            label = image_name.split('_')[1]    # extract AFl class
            label = int(label.split('AFl')[1])
            # label = image_name.split('_')[0]    # extract macro group
            # label = int(label.split('MG')[1])
            label = torch.tensor(label, dtype = torch.long)    # convert into tensor
            
            img = sio.loadmat(dir_test + image_name)    # open mat file
            # img = img['RP']    # extract RP
            img = img['Dist']    # extract DP
            img = img.reshape(1, 350, 350)    # reshape binary file
            img = torch.ByteTensor(img)
            img = img.type(torch.FloatTensor)    # convert to FloatTensor
            
            return img, label
        
    def __len__(self):
        return len(self.imgs)
    
    
"""
4) Definition of hyper-parameters and the helper functions train_transform, 
   update_lr and accuracy
"""
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnext50.parameters(), lr=learning_rate)
# optimizer = torch.optim.SGD(resnext50.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)


# transformations of the images (resize, normalize and convert to tensor)
def train_transform():
    return transforms.Compose([transforms.Resize(350), transforms.ToTensor(), 
                               transforms.Normalize(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])])


# for updating learning rate to implement decay
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


# calculating the accuracy of the predictions compared to the ground truth
def accuracy(outputs, trues):
    # store 1 in acc for correct prdiction, 0 otherwise
    _, predicted = torch.max(outputs.data, 1)
    acc = [1 if predicted[i] == trues[i] else 0 for i in range(len(predicted))]
    
    # sum up all correct predictions and divide them through the amount of predictions
    acc = np.sum(acc) / len(predicted)
    
    return (acc * 100)
   
 
"""
5) Create data loaders by calling data set constructors
"""
train_imgs, val_imgs = train_test_split(imgs, test_size=0.125)
# train_imgs = (os.listdir(dir_train))
# val_imgs = (os.listdir(dir_val))
test_imgs = (os.listdir(dir_test))

train_dataset = DPDataset(train_imgs, mode='train', transforms=train_transform())
val_dataset = DPDataset(val_imgs, mode='val', transforms=train_transform())
test_dataset = DPDataset(test_imgs, mode='test', transforms=train_transform())

train_data_loader = DataLoader(dataset=train_dataset, num_workers=0,
                               batch_size=batchsize, shuffle=True)
val_data_loader = DataLoader(dataset=val_dataset, num_workers=0,
                             batch_size=batchsize, shuffle=True)
test_data_loader = DataLoader(dataset=test_dataset, num_workers=0,
                              batch_size=batchsize, shuffle=True)


"""
6) Definition of the training function to vaildate the model after one epoch
"""
total_step = len(train_data_loader)
curr_lr = learning_rate

def train_one_epoch(train_data_loader):
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    
    for i, (images, labels) in enumerate(train_data_loader):
        images = images.to(device)
        # substract 1 to every label, labels index should start at 0
        labels = labels-1
        labels = labels.to(device)

        # forward passing
        optimizer.zero_grad()
        outputs = resnext50(images)
        
        # calculating accuracy and loss based on the criterion
        _loss = criterion(outputs, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        acc = accuracy(outputs, labels)
        epoch_acc.append(acc)

        # backward passing and optimizing
        _loss.backward()
        optimizer.step()
        # torch.cuda.empty_cache()
        
    # time of one epoch
    end_time = time.time()
    total_time = end_time - start_time
    
    # accuracy and loss during one epoch
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    # storing results to logs
    train_logs['loss'].append(epoch_loss)
    train_logs['accuracy'].append(epoch_acc)
    train_logs['time'].append(total_time)
    
    return epoch_loss, epoch_acc, total_time


"""
7) Definition of the validation function to vaildate the model after one epoch
"""
def val_one_epoch(val_data_loader, best_val_acc):
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()

    for i, (images, labels) in enumerate(val_data_loader):
        images = images.to(device)
        # substract 1 to every label, labels index should start at 0
        labels = labels-1
        labels = labels.to(device)

        # forward passing
        outputs = resnext50(images)
        
        # calculating accuracy and loss based in the criterion
        _loss = criterion(outputs, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        acc = accuracy(outputs, labels)
        epoch_acc.append(acc)
        # torch.cuda.empty_cache()
            
    # time of one epoch  
    end_time = time.time()
    total_time = end_time - start_time
    
    # accuracy and loss during one epoch
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    # storing results to logs
    val_logs['loss'].append(epoch_loss)
    val_logs['accuracy'].append(epoch_acc)
    val_logs['time'].append(total_time)
    
    # saving best model
    if epoch_acc > best_val_acc:
        best_val_acc = epoch_acc
        torch.save(resnext50.state_dict(),'resnext50_best.pth')
        
    return epoch_loss, epoch_acc, total_time, best_val_acc
        

"""
8) Train and validate the model over the number of epochs
"""
best_val_acc = 0
for epoch in range(num_epochs):
    
    # training by calling the function 'train_one_epoch'
    loss, acc, _time = train_one_epoch(train_data_loader)
    
    # print epoch details
    print('\nTraining')
    print ("Epoch [{}/{}], Loss: {:.4f}, Acc: {:.4f}, Time: {:.4f}" 
                    .format(epoch+1, num_epochs, 
                            loss.item(), acc.item(), round(_time, 4)))
    
    # validate by calling the function 'val_one_epoch'
    loss, acc, _time, best_val_acc = val_one_epoch(val_data_loader, best_val_acc)
    
    # print epoch details
    print('\nValidating')
    print ("Epoch [{}/{}], Loss: {:.4f}, Acc: {:.4f}, Time: {:.4f}" 
                    .format(epoch+1, num_epochs, 
                            loss.item(), acc.item(), round(_time, 4)))
    
    # step decay of the learning rate
    if (epoch + 1) % 2 == 0:
        curr_lr /= 2
        update_lr(optimizer, curr_lr)
    # elif epoch % 4 == 0:
    #     curr_lr /= 1000
        # update_lr(optimizer, curr_lr)
        
    # exponential decay of the learning rate
    # curr_lr *= math.exp(-0.01*epoch)
    # update_lr(optimizer, curr_lr)
    
    
"""
9) Load the best model and test it. Print the accuracy of the prdictions in percentage.
"""
resnext50_best = torch.hub.load('pytorch/vision:v0.6.0', 'resnext50_32x4d', pretrained=False)
resnext50_best.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)    # first layer for binary/gray jpg input
resnext50_best.fc = nn.Linear(2048, 20)    # last layer for 2048 in_features and three classes
# load the saved model
resnext50_best.load_state_dict(torch.load('C:/Users/ml394w/ResNet/resnext50_best.pth'))

resnext50_best.to(device)
resnext50_best.eval()
labels_list = []
prediction_list = []
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_data_loader:
        images = images.to(device)
        # substract 1 to every label, labels index should start at 0
        labels = labels-1
        labels = labels.to(device)
        
        outputs = resnext50_best(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        prediction_list.extend(predicted.cpu().numpy())
        labels_list.extend(labels.cpu().numpy())

    print('\nAccuracy of the model on the test images: {} %'.format(100*correct/total))
    
    # print the classification report
    print(classification_report(labels_list, prediction_list))
    
    
"""
10) Plot the results
"""
# plot the loss in a diagram
plt.title('Loss')
plt.plot(np.arange(1, num_epochs+1, 1), train_logs['loss'], color = 'blue', label = 'train')
plt.plot(np.arange(1, num_epochs+1, 1), val_logs['loss'], color = 'green', label = 'val')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
            ncol=2, borderaxespad=0.)
plt.savefig('ResNeXt50_RPs_h8_Adam_epochs_10_lr_0.001_step_2_2_bs_8_loss_mg.png')
plt.show()

# plot the accuracy in a diagram
plt.title('Accuracy')
plt.plot(np.arange(1, num_epochs+1, 1), train_logs['accuracy'], color = 'blue', label = 'train')
plt.plot(np.arange(1, num_epochs+1, 1), val_logs['accuracy'], color = 'green', label = 'val')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
            ncol=2, borderaxespad=0.)
plt.savefig('ResNeXt50_RPs_h8_Adam_epochs_10_lr_0.001_step_2_2_bs_8_acc_mg.png')
plt.show()


Any help is appreciated. Thanks in advance.

What I figured out right know is that my costumised resnext50 decides for the complete classification only to choose the class with the most observations. This means if I would train the network on three classes A, B and C and the test set contains class A (30 observations), B (20 observations) and C (50 observations), the results would have an accuracy of 50%, since the network alaways decides to go for class C.

To counter overfitting on the majority class you could use a weighted loss function (e.g. using the weight argument in nn.CrossEntropyLoss) or you could apply weighted sampling via WeightedRandomSampler as given in this example to balance the dataset.

Thanks for you’re reply. I have a data set containing 20 classes. Each class is represented more or less equal. So I think this would nor work out.

I already figured out that the network is not working on one of the two data sets. I thought about finding a submaxima of the loss function and tried with really low learning rates. However, the behaviour on that data set remaind the same with much slower cenvergence of the loss, consequently. Thus, I guess, the data set is not working with this network at all. Maybe it is to complex?