Hi. I am trying to get standard deviation of predict value to describe predict interval using Monte Carlo Dropout. I can run my code without any problems. However I got standard deviation which all values are zero. I think there are problems in dataset or algorithm. So give me some advices to fix my code.
My code is here.
class MyNet(nn.Module):
def __init__(self, x1, x2, p, m, mode=0):
super().__init__()
input_dim = p*m
hidden_dim = 128
self.dropout = nn.Dropout(p=0.1)
if mode == 1:
self.embed = Encoder1(x1, x2, p)
elif mode == 2:
self.embed = Encoder2(x1, x2, p)
else:
self.embed = Encoder(x1, x2, p)
self.share_block = nn.Sequential(
# nn.BatchNorm1d(input_dim),
nn.utils.weight_norm(nn.Linear(input_dim, hidden_dim)),
# nn.Dropout(p=0.1),
nn.SELU(),
# nn.BatchNorm1d(hidden_dim),
nn.utils.weight_norm(nn.Linear(hidden_dim, hidden_dim)),
# nn.Dropout(p=0.1),
nn.SELU(),
# nn.BatchNorm1d(hidden_dim),
nn.utils.weight_norm(nn.Linear(hidden_dim, hidden_dim)),
# nn.Dropout(p=0.1),
nn.SELU(),
# nn.BatchNorm1d(hidden_dim),
nn.utils.weight_norm(nn.Linear(hidden_dim, hidden_dim)),
# nn.Dropout(p=0.1),
nn.SELU()
)
self.head = nn.Sequential(
# nn.BatchNorm1d(hidden_dim),
nn.utils.weight_norm(nn.Linear(hidden_dim, 1))
)
def forward(self, x1, x2):
x1 = x1.to(device)
x2 = x2.to(device)
x = self.embed(x1, x2)
x = self.share_block(x)
_out = self.head(x)
_out = self.dropout(_out)
return _out
def predict(self, x1, x2, alpha, samples=100):
x1 = x1.to(device)
x2 = x2.to(device)
# z_c = norm.ppf(alpha)
samples_ = []
for i in range(samples):
pred = self.forward(x1, x2).detach()
samples_ += pred
# print(len(samples_))
pred_sample = torch.cat(samples_, dim=-1)
pred_sample = torch.reshape(pred_sample, (100, -1))
# print(pred_sample.shape)
pred_mean = torch.mean(pred_sample, dim=0)
print(pred_mean)
pred_std = np.sqrt(torch.var(pred_sample, dim=0))
return pred_mean, pred_std
def train_and_test_gpu(model, num_epochs, dataset):
# model.cuda()
print(model)
train_dataset = utils.data.Subset(dataset, train_idx)
train_loader = utils.data.DataLoader(
train_dataset, batch_size=64, worker_init_fn=seed_worker, generator=g, shuffle=True)
test_dataset = utils.data.Subset(dataset, test_idx)
test_loader = utils.data.DataLoader(
test_dataset, batch_size=64, shuffle=False)
optimizer = optim.Adam(model.parameters(), lr=1e-6)
criterion = nn.MSELoss()
# このschedulerはかなり特殊でscheduler.step()を呼び出す位置も違うので注意
scheduler = optim.lr_scheduler.OneCycleLR(
optimizer,
max_lr=1e-3,
steps_per_epoch=len(train_loader),
epochs=num_epochs,
)
model.train()
for epoch in tqdm(range(num_epochs)):
for x1, x2, y in train_loader:
# print(x1.shape, x2.shape)
x1 = x1.to(device)
x2 = x2.to(device)
y = torch.Tensor(y).to(device)
# print(x1.is_cuda)
# print(x2.is_cuda)
# print(y.is_cuda)
optimizer.zero_grad()
y_pred = model(x1, x2)
# y_pred = torch.take_along_dim(y_pred, task)
loss = criterion(y_pred, y.unsqueeze(1))
loss.backward()
optimizer.step()
scheduler.step()
model.eval()
with torch.no_grad():
y_true_list = []
y_pred_list = []
pred_std_list = []
for x1, x2, y_true in test_loader:
x1 = torch.Tensor(x1).to(device)
x2 = torch.Tensor(x2).to(device)
y_pred, pred_std = model.predict(x1, x2, alpha=0.95)
# print(pred_std)
# y_pred = torch.take_along_dim(y_pred, task)
y_pred_list.extend(y_pred.detach().flatten().cpu().tolist())
y_true_list.extend(y_true.detach().cpu().tolist())
pred_std_list.extend(pred_std.detach().cpu().tolist())
# print(y_pred_list)
print(np.array(y_true_list).shape, np.array(y_pred_list).shape)
print(np.array(pred_std_list).shape)
plot_prediction(y_true_list, y_pred_list, color="red")
return y_true_list, y_pred_list, pred_std_list
and output standard deviation is here.
[0.0,
0.0,
0.0,
0.0,
0.0,
・・・
0.0]