How to get predict interval using Monte Carlo Dropout

Hi. I am trying to get standard deviation of predict value to describe predict interval using Monte Carlo Dropout. I can run my code without any problems. However I got standard deviation which all values are zero. I think there are problems in dataset or algorithm. So give me some advices to fix my code.
My code is here.

class MyNet(nn.Module):
    def __init__(self, x1, x2, p, m, mode=0):
        super().__init__()
        input_dim = p*m 
        hidden_dim = 128
        self.dropout = nn.Dropout(p=0.1)
        if mode == 1:
            self.embed = Encoder1(x1, x2, p)
        elif mode == 2:
            self.embed = Encoder2(x1, x2, p)
        else:
            self.embed = Encoder(x1, x2, p)

        self.share_block = nn.Sequential(
            # nn.BatchNorm1d(input_dim),
            nn.utils.weight_norm(nn.Linear(input_dim, hidden_dim)),
            # nn.Dropout(p=0.1),
            nn.SELU(),
            # nn.BatchNorm1d(hidden_dim),
            nn.utils.weight_norm(nn.Linear(hidden_dim, hidden_dim)),
            # nn.Dropout(p=0.1),
            nn.SELU(),
            # nn.BatchNorm1d(hidden_dim),
            nn.utils.weight_norm(nn.Linear(hidden_dim, hidden_dim)),
            # nn.Dropout(p=0.1),
            nn.SELU(),
            # nn.BatchNorm1d(hidden_dim),
            nn.utils.weight_norm(nn.Linear(hidden_dim, hidden_dim)),
            # nn.Dropout(p=0.1),
            nn.SELU()
        )
        self.head = nn.Sequential(
            # nn.BatchNorm1d(hidden_dim),
            nn.utils.weight_norm(nn.Linear(hidden_dim, 1))
        )

    def forward(self, x1, x2):
        x1 = x1.to(device)
        x2 = x2.to(device)
        x = self.embed(x1, x2)
        x = self.share_block(x)
        _out = self.head(x)
        _out = self.dropout(_out)
       

        return _out

    def predict(self, x1, x2, alpha, samples=100):
        x1 = x1.to(device)
        x2 = x2.to(device)
        # z_c = norm.ppf(alpha)
        samples_ = []
        for i in range(samples):
            pred = self.forward(x1, x2).detach()
            samples_ += pred
            
        # print(len(samples_))
        pred_sample = torch.cat(samples_, dim=-1)
        pred_sample = torch.reshape(pred_sample, (100, -1))
        # print(pred_sample.shape)
        pred_mean = torch.mean(pred_sample, dim=0)
        print(pred_mean)
        pred_std = np.sqrt(torch.var(pred_sample, dim=0))

        return pred_mean, pred_std

def train_and_test_gpu(model, num_epochs, dataset):
    # model.cuda()
    print(model)
    train_dataset = utils.data.Subset(dataset, train_idx)
    train_loader = utils.data.DataLoader(
        train_dataset, batch_size=64, worker_init_fn=seed_worker, generator=g, shuffle=True)

    test_dataset = utils.data.Subset(dataset, test_idx)
    test_loader = utils.data.DataLoader(
        test_dataset, batch_size=64, shuffle=False)

    optimizer = optim.Adam(model.parameters(), lr=1e-6)
    criterion = nn.MSELoss()

    # このschedulerはかなり特殊でscheduler.step()を呼び出す位置も違うので注意
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=1e-3,
        steps_per_epoch=len(train_loader),
        epochs=num_epochs,
    )

    model.train()
    for epoch in tqdm(range(num_epochs)):
        for x1, x2, y in train_loader:
            # print(x1.shape, x2.shape)

            x1 = x1.to(device)
            x2 = x2.to(device)
            y = torch.Tensor(y).to(device)

            # print(x1.is_cuda)
            # print(x2.is_cuda)
            # print(y.is_cuda)
            optimizer.zero_grad()
            y_pred = model(x1, x2)

            # y_pred = torch.take_along_dim(y_pred, task)
            loss = criterion(y_pred, y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            scheduler.step()

    model.eval()
    with torch.no_grad():
        y_true_list = []
        y_pred_list = []
        pred_std_list = []
        for x1, x2, y_true in test_loader:
            x1 = torch.Tensor(x1).to(device)
            x2 = torch.Tensor(x2).to(device)

            y_pred, pred_std = model.predict(x1, x2, alpha=0.95)
            # print(pred_std)
            # y_pred = torch.take_along_dim(y_pred, task)
            y_pred_list.extend(y_pred.detach().flatten().cpu().tolist())
            y_true_list.extend(y_true.detach().cpu().tolist())
            pred_std_list.extend(pred_std.detach().cpu().tolist())
        # print(y_pred_list)
    print(np.array(y_true_list).shape, np.array(y_pred_list).shape)
    print(np.array(pred_std_list).shape)
    plot_prediction(y_true_list, y_pred_list, color="red")
    return y_true_list, y_pred_list, pred_std_list

and output standard deviation is here.

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
・・・
 0.0]

@Takumi3 The monte carlo sounds interesting. Can you share a link of the paper/blog etc

In your code, I can see an issue here

In the predict function

samples_ = []
for i in range(samples):
    pred = self.forward(x1, x2).detach()
    samples_ += pred

Although you are iterating over a range of 100, you are performing the forward on x1,x2 without updating the weights. This will give you the same results for all the 100 iterations. You need to perform some weight updation like .step() etc

Thanks for your help. I can correctly run my code.

and I share blog and paper about Monte Calro dropout and predict, confidence interval.
blogs:
“What My Deep Model Doesn’t Know…”
What my deep model doesn't know... | Yarin Gal - Blog | Cambridge Machine Learning Group

" Prediction Intervals for Deep Learning Neural Networks"

" Uncertainty in Neural Networks"

paper:
" High-Quality Prediction Intervals for Deep Learning: A Distribution-Free, Ensembled Approach"

1 Like

@Takumi3 thanks for sharing your work. It helps in keeping the community vibrant