Dropout at test-time for uncertainty estimation

johnvalen1 · March 22, 2020, 11:48pm

Hello,

I’m trying to use dropout at test-time with a neural network trained on MNIST, where the idea is to measure input-specific uncertainty. I do this by inputting a single test-set image, and having T models (defined by drop-out T times) make predictions, then I calculate the variance across the T model predictive probabilities for that class.

The problem is that when I make my predictions, all the T models output identical predictive probabilities for each of the 10 classes. I’m pretty sure that drop-out isn’t kicking in at test time. Can you please help?

I’m using Google Colab, running PyTorch version 1.4.0, what is automatically imported in Colab when I call ‘import torch’.

This is my network:

hyperparams = {'l1_out': 512,
                  'l2_out': 512,
                  'l1_drop': 0.5,
                  'l2_drop': 0.5,
                  'batch_size': 32,
                  'epochs': 10}

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, hyperparams['l1_out'])
        self.dropout1 = nn.Dropout(hyperparams['l1_drop'])
        self.fc2 = nn.Linear(hyperparams['l1_out'], hyperparams['l2_out'])
        self.dropout2 = nn.Dropout(hyperparams['l2_drop'])
        self.fc3 = nn.Linear(hyperparams['l2_out'], 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x) #exponentiate logits to retrieve predicted probabilities

Now, I’ve decided to take T = 3, and to input a specific image. I’m also using two functions from two different sources right here on this forum, apply_dropout and enable_dropout.

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('mnist-data/', train=True, download=True,
                       transform=transforms.Compose([transforms.ToTensor(),])),
        batch_size=hyperparams['batch_size'], shuffle=True)

test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('mnist-data/', train=False, transform=transforms.Compose([transforms.ToTensor(),])
                       ),
        batch_size=hyperparams['batch_size'], shuffle=True)


#----------------------------------------------TRAIN ----------------------------------------------
net_trained = Net()

# create an Adam optimizer
optimizer = optim.Adam(net_trained.parameters(), lr=0.001)
# create a loss function
criterion = nn.CrossEntropyLoss()

epochs = hyperparams['epochs']

# run the main training loop
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
        data = data.view(-1, 28*28)
        optimizer.zero_grad()
        net_out = net_trained(data)
        loss = criterion(net_out, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                           100. * batch_idx / len(train_loader), loss.data))
f = 'model'
torch.save(net_trained.state_dict(), f)

#----------------------------CHOOSE RANDOM IMAGE----------------------------------------------
for test_images, test_labels in test_loader:  
    sample_image = test_images[0]    
    sample_label = test_labels[0]

T = 3 # how many times to apply drop-out at test time

def apply_dropout(m):
    for each_module in m.children():
        if each_module.__class__.__name__.startswith('Dropout'):
            each_module.train()
    return m

def enable_dropout(m):
  for each_module in m.modules():
    if each_module.__class__.__name__.startswith('Dropout'):
      each_module.train()

def uncertainties(p):
    aleatoric = np.mean(p*(1-p), axis=0)
    epistemic = np.mean(p**2, axis=0) - np.mean(p, axis=0)**2
    return aleatoric, epistemic

#----------------------------PREDICT-------------------------------------------
def predict(model, image, label, T=T):
    enable_dropout(model)
    # prepare label
    label = label.item()
    # prepare image
    image = image.flatten()
    #image
    standard_output = model(image)
    _, standard_prediction = standard_output.max(0)
    model.train()
    # each vector will consist of T elements- the class-specific predictive probability from each model
    zero_p_hat= []
    one_p_hat= []
    two_p_hat= []
    three_p_hat= []
    four_p_hat= []
    five_p_hat= []
    six_p_hat= []
    seven_p_hat= []
    eight_p_hat= []
    nine_p_hat= []

    # predict stochastic dropout model T times
    for t in range(T):
        enable_dropout(model) # STILL NOT WORKING WITH DROPOUT AT TEST-TIME
        output = model(Variable(image))
        output = output.detach()
        output_prob = np.exp(output) #convert to predictive probabilities

        zero_p_hat.append(output_prob[0].item()) # P( c = 0 | image)
        one_p_hat.append(output_prob[1].item()) # P( c = 1 | image)
        two_p_hat.append(output_prob[2].item()) # P( c = 2 | image)
        three_p_hat.append(output_prob[3].item()) # P( c = 3 | image)
        four_p_hat.append(output_prob[4].item()) # P( c = 4 | image)
        five_p_hat.append(output_prob[5].item()) # P( c = 5 | image)
        six_p_hat.append(output_prob[6].item()) # P( c = 6 | image)
        seven_p_hat.append(output_prob[7].item()) # P( c = 7 | image)
        eight_p_hat.append(output_prob[8].item()) # P( c = 8 | image)
        nine_p_hat.append(output_prob[9].item()) # P( c = 9 | image)

    # mean prediction
    zero_var = np.var(zero_p_hat)
    one_var = np.var(one_p_hat)
    two_var = np.var(two_p_hat)
    three_var = np.var(three_p_hat)
    four_var = np.var(four_p_hat)
    five_var = np.var(five_p_hat)
    six_var = np.var(six_p_hat)
    seven_var = np.var(seven_p_hat)
    eight_var = np.var(eight_p_hat)
    nine_var = np.var(nine_p_hat)
    
    # will be a list of 10 elements, the variance of each class's predictions across T models
    class_specific_uncertainties = [zero_var, one_var, two_var, three_var, four_var, five_var, six_var, seven_var, eight_var, nine_var]
    variance_of_class_specific_predictions = class_specific_uncertainties[label]
    
    # estimate uncertainties (eq. 4 )
    # eq.4 in https://openreview.net/pdf?id=Sk_P2Q9sG
    # see https://github.com/ykwon0407/UQ_BNN/issues/1
    p_hat_lists = [zero_p_hat, one_p_hat, two_p_hat, three_p_hat, four_p_hat, five_p_hat, six_p_hat, seven_p_hat, eight_p_hat, nine_p_hat]
    epistemic, aleatoric = uncertainties(np.array(p_hat_lists[label]))

    return standard_prediction.item(), variance_of_class_specific_predictions, np.squeeze(aleatoric), np.squeeze(epistemic), p_hat_lists[label]


prediction, var_uncertainty, aleatoric, epistemic, _ = predict(net_untrained, sample_image, sample_label)


print("\n The model predicts: {} \n The ground truth is {}.\n With drop-out at test-time {} times, variance of class-specific predictions across the models is {}. \n Finally, aleatoric and epistemic uncertainties are {} and {}.\n The models give predictive probabilities: {}".format(prediction, sample_label, T, var_uncertainty, aleatoric, epistemic, _))

The output is:
The model predicts: 0
The ground truth is 0.
With drop-out at test-time 3 times, variance of class-specific predictions across the models is 0.0.
Finally, aleatoric and epistemic uncertainties are 0.0 and 0.013912441817748089.
The models give predictive probabilities:
[0.9858884215354919, 0.9858884215354919, 0.9858884215354919]

ptrblck · March 23, 2020, 3:26am

You are defining the nn.Dropout modules, but are never using them in your forward.

Add them via self.dropoutX and it should work.

johnvalen1 · March 23, 2020, 1:39pm

Great, it worked. Thank you kindly!
For anyone else reading this, I modified my network:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, hyperparams['l1_out'])
        self.dropout1 = nn.Dropout(hyperparams['l1_drop'])
        self.fc2 = nn.Linear(hyperparams['l1_out'], hyperparams['l2_out'])
        self.dropout2 = nn.Dropout(hyperparams['l2_drop'])
        self.fc3 = nn.Linear(hyperparams['l2_out'], 10)

    def forward(self, x):
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return F.log_softmax(x) #exponentiate logits to retrieve predicted probabilities

ageryw · September 28, 2020, 9:14am

@ptrblck and @johnvalen1 Thank you both, your explanation helped me with the same problem.