I am looking at the second problem of the homework solution from the course unsupervised learning here.
Can someone explain how does the author convert this equation:
$p_\theta(x) = \sum_{i=1}^4 \pi_i[\sigma((x+0.5 - \mu_i)/s_i) - \sigma((x-0.5-\mu_i)/s_i)]$
Into the following piece of code in Pytorch ( I am having trouble understanding the forward function).
Specific questions that. I have are :
- Why is
log_space
which represents $s_i$ from the equations, a log value who’sexp
we have to calculate instead of a simple vector of 4 - Why do we need to calculate
log_cdf_delta
and why do we need to usetorch.clamp
- Why do we need to calculate
x_log_probs
and why are we selecting different values based onx < 0.001
class MixtureOfLogistics(nn.Module):
def __init__(self, d, n_mix=4):
super().__init__()
self.d = d
self.n_mix = n_mix
self.logits = nn.Parameter(torch.zeros(n_mix), requires_grad=True)
self.means = nn.Parameter(torch.arange(n_mix).float() / (n_mix - 1) * d,
requires_grad=True)
self.log_scales = nn.Parameter(torch.randn(n_mix), requires_grad=True)
def forward(self, x):
d = self.d
x = x.float()
x = x.unsqueeze(1).repeat(1, self.n_mix) # b x n_mix
means, log_scales = self.means.unsqueeze(0), self.log_scales.unsqueeze(0) # 1 x n_mix
inv_scales = torch.exp(-log_scales)
plus_in = inv_scales * (x + 0.5 - means)
min_in = inv_scales * (x - 0.5 - means)
cdf_plus = torch.sigmoid(plus_in) # CDF of logistics at x + 0.5
cdf_min = torch.sigmoid(min_in) # CDF of logistics at x - 0.5
cdf_delta = cdf_plus - cdf_min # probability of x in bin [x - 0.5, x + 0.5]
log_cdf_delta = torch.log(torch.clamp(cdf_delta, min=1e-12))
log_cdf_plus = torch.log(torch.clamp(torch.sigmoid(inv_scales * (0.5 - means)),
min=1e-12))
log_cdf_min = torch.log(torch.clamp(1 - torch.sigmoid(inv_scales * (d - 1.5 - means)),
min=1e-12))
x_log_probs = torch.where(x < 0.001, log_cdf_plus,
torch.where(x > d - 1 - 1e-3,
log_cdf_min, log_cdf_delta))
pi_log_probs = F.log_softmax(self.logits, dim=0).unsqueeze(0)
log_probs = x_log_probs + pi_log_probs
return torch.logsumexp(log_probs, dim=1)
def loss(self, x):
return -torch.mean(self(x))
def get_distribution(self):
with torch.no_grad():
x = torch.FloatTensor(np.arange(self.d)).cuda()
distribution = self(x).exp()
return distribution.detach().cpu().numpy()