Accuracy over 100%

I am using OpenFL, the Intel framework for Federated Learning.
If I run their tutorial example, I have that loss decreases and accuracy is in range 0-100%, like this:

[16:21:05] METRIC   Round 4, collaborator env_one train result train_loss:  3.083468                                                           experiment.py:112
[16:21:29] METRIC   Round 4, collaborator env_one localy_tuned_model_validate result acc:   0.640100                                           experiment.py:112
[16:21:53] METRIC   Round 4, collaborator env_one aggregated_model_validate result acc:     0.632200                                           experiment.py:112
           METRIC   Round 4, collaborator Aggregator localy_tuned_model_validate result acc:        0.640100                                   experiment.py:112
           METRIC   Round 4, collaborator Aggregator aggregated_model_validate result acc:  0.632200                                           experiment.py:112
[16:21:58] METRIC   Round 4, collaborator Aggregator train result train_loss:       3.083468 

So basically, an accuracy of 64%.
The problem is when I run my experiments. From their code I changed only the dataset (I am using MNIST) and the neural network. My neural network is the following:

class VGG16(nn.Module):

    def __init__(self, num_classes):
        super(VGG16, self).__init__()

        # calculate same padding:
        # (w - k + 2*p)/s + 1 = o
        # => p = (s(o-1) - w + k)/2

        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=1,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      # (1(32-1)- 32 + 3)/2 = 1
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )

        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=64,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )
        
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=128,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )

        self.block_4 = nn.Sequential(
            nn.Conv2d(in_channels=256,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )            

        self.classifier = nn.Sequential(
            nn.Linear(out_features, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.65),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.65),
            nn.Linear(4096, num_classes) 
        )

        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
                nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
#                 nn.init.xavier_normal_(m.weight)
                if m.bias is not None:
                    m.bias.detach().zero_()

        # self.avgpool = nn.AdaptiveAvgPool2d((7, 7))

    def forward(self, x):

        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

And I pass 10 as number of classes:

# Create the model
model = VGG16(10)
output = model(test_x)
output.shape
  torch.Size([1, 10])

If I run this, I obtain accuracies like this:

[15:22:51] METRIC   Round 0, collaborator env_one train result train_loss:  0.741962                                                           experiment.py:112
[15:22:54] METRIC   Round 0, collaborator env_one localy_tuned_model_validate result acc:   13.395700                                          experiment.py:112
[15:22:57] METRIC   Round 0, collaborator env_one aggregated_model_validate result acc:     12.433300                                          experiment.py:112
           METRIC   Round 0, collaborator Aggregator aggregated_model_validate result acc:  12.433300                                          experiment.py:112
[15:23:03] METRIC   Round 0, collaborator Aggregator train result train_loss:       0.741962                                                   experiment.py:112
           METRIC   Round 0, collaborator Aggregator localy_tuned_model_validate result acc:        13.395700 

Loss is correct, while accuracies are out of range.
My loss:

def cross_entropy(output, target):
    """Binary cross-entropy metric
    """
    #return F.cross_entropy(input=output,target=target)
    #return F.binary_cross_entropy_with_logits(input=output,target=target)
    criterion = nn.CrossEntropyLoss()
    loss = criterion(output, target)
    return loss

My validation function:

@task_interface.register_fl_task(model='net_model', data_loader='val_loader', device='device')     
def validate(net_model, val_loader, device):
    torch.manual_seed(0)
    device = torch.device('cuda')
    net_model.eval()
    net_model.to(device)
    
    val_loader = tqdm.tqdm(val_loader, desc="validate")
    val_score = 0
    total_samples = 0

    with torch.no_grad():
        for data, target in val_loader:
            samples = target.shape[0]
            total_samples += samples
            data, target = torch.tensor(data).to(device), \
                torch.tensor(target).to(device, dtype=torch.int64)
            output = net_model(data)
            pred = output.argmax(dim=1,keepdim=True)
            val_score += pred.eq(target).sum().cpu().numpy()
            
    return {'acc': val_score / total_samples,}

I solved in this way:

#original
#pred = output.argmax(dim=1,keepdim=True)
            
#my solution
_, pred = torch.max(output, dim=1)

I do not know why, buy my solution it works. If someone has an intuition can explain me why this works? Thanks

The difference seems to be the lack of the keepdim=True argument so I guess an unwanted broadcasting was applied somewhere in the code and thus pushed the accuracy out of bounds.

1 Like

Probably the problem was that