I’m currently trying to implement deep neural decision forest, however, I met some problems.
It seem that the submodule’s parameters haven’t add to the model’s parametes. I wonder if it is because of the module list.
Here is my definition of the model:
`
class DeepNeuralDecisionForest(nn.Module):
def __init__(self, p_keep_conv, p_keep_hidden, n_leaf, n_label, n_tree, n_depth):
super(DeepNeuralDecisionForest, self).__init__()
self.conv = nn.Sequential()
self.conv.add_module('conv1', nn.Conv2d(1, 32, kernel_size=3, padding=1))
self.conv.add_module('relu1', nn.ReLU())
self.conv.add_module('pool1', nn.MaxPool2d(kernel_size=2))
self.conv.add_module('drop1', nn.Dropout(1 - p_keep_conv))
self.conv.add_module('conv2', nn.Conv2d(32, 64, kernel_size=3, padding=1))
self.conv.add_module('relu2', nn.ReLU())
self.conv.add_module('pool2', nn.MaxPool2d(kernel_size=2))
self.conv.add_module('drop2', nn.Dropout(1 - p_keep_conv))
self.conv.add_module('conv3', nn.Conv2d(64, 128, kernel_size=3, padding=1))
self.conv.add_module('relu3', nn.ReLU())
self.conv.add_module('pool3', nn.MaxPool2d(kernel_size=2))
self.conv.add_module('drop3', nn.Dropout(1 - p_keep_conv))
self._nleaf = n_leaf
self._nlabel = n_label
self._ntree = n_tree
self._ndepth = n_depth
self._batchsize = 100
self.treelayers = []
self.pi_e = []
for i in xrange(self._ntree):
treelayer = nn.Sequential()
treelayer.add_module('sub_linear1', nn.Linear(1152, 625))
treelayer.add_module('sub_relu', nn.ReLU())
treelayer.add_module('sub_drop1', nn.Dropout(1 - p_keep_hidden))
treelayer.add_module('sub_linear2', nn.Linear(625, self._nleaf))
treelayer.add_module('sub_sigmoid', nn.Sigmoid())
pi = Parameter(self.init_pi())
self.treelayers.append(treelayer)
self.pi_e.append(nn.Softmax()(pi))
def init_pi(self):
return torch.ones(self._nleaf, self._nlabel)/float(self._nlabel)
def init_weights(self, shape):
return torch.randn(shape) * 0.01
def init_prob_weights(self, shape, minval=-5, maxval=5):
return torch.Tensor(shape[0], shape[1]).uniform_(minval, maxval)
def compute_mu(self, flat_decision_p_e):
n_batch = self._batchsize
batch_0_indices = torch.range(0, n_batch * self._nleaf - 1, self._nleaf).unsqueeze(1).repeat(1, self._nleaf).long()
in_repeat = self._nleaf / 2
out_repeat = n_batch
batch_complement_indices = torch.LongTensor(
np.array([[0] * in_repeat, [n_batch * self._nleaf] * in_repeat] * out_repeat).reshape(n_batch, self._nleaf))
# First define the routing probabilistics d for root nodes
mu_e = []
indices_var = Variable((batch_0_indices + batch_complement_indices).view(-1))
indices_var = indices_var.cuda()
#indices_var = indices_var.typeas(flat_decision_p_e[0])
# iterate over each tree
for i, flat_decision_p in enumerate(flat_decision_p_e):
mu = torch.gather(flat_decision_p, 0, indices_var).view(n_batch, self._nleaf)
mu_e.append(mu)
# from the scond layer to the last layer, we make the decison nodes
for d in xrange(1, self._ndepth + 1):
indices = torch.range(2 ** d, 2 ** (d + 1) - 1) - 1
tile_indices = indices.unsqueeze(1).repeat(1, 2 ** (self._ndepth - d + 1)).view(1, -1)
batch_indices = batch_0_indices + tile_indices.repeat(n_batch, 1).long()
in_repeat = in_repeat / 2
out_repeat = out_repeat * 2
# Again define the indices that picks d and 1-d for the nodes
batch_complement_indices = torch.LongTensor(
np.array([[0] * in_repeat, [n_batch * self._nleaf] * in_repeat] * out_repeat).reshape(n_batch, self._nleaf))
mu_e_update = []
indices_var = Variable((batch_indices + batch_complement_indices).view(-1))
indices_var = indices_var.cuda()
for mu, flat_decision_p in zip(mu_e, flat_decision_p_e):
mu = torch.mul(mu, torch.gather(flat_decision_p, 0, indices_var).view(
n_batch, self._nleaf))
mu_e_update.append(mu)
mu_e = mu_e_update
return mu_e
def compute_py_x(self, mu_e):
py_x_e = []
n_batch = self._batchsize
for mu, leaf_p in zip(mu_e, self.pi_e):
py_x_tree = mu.unsqueeze(2).repeat(1, 1, self._nlabel).mul(leaf_p.unsqueeze(0).repeat(n_batch, 1, 1)).mean(1)
py_x_e.append(py_x_tree)
py_x_e = torch.cat(py_x_e, 1)
py_x = py_x_e.mean(1).squeeze()
return py_x
def forward(self, x):
feat = self.conv.forward(x)
feat = feat.view(-1, 1152)
self._batchsize = x.size(0)
#py_x = self.fc.forward(feat)
flat_decision_p_e = []
for i in xrange(len(self.treelayers)):
decision_p = self.treelayers[i].forward(feat)
decision_p_comp = 1 - decision_p
decision_p_pack = torch.cat((decision_p, decision_p_comp), 1)
flat_decision_p = decision_p_pack.view(-1)
flat_decision_p_e.append(flat_decision_p)
mu_e = self.compute_mu(flat_decision_p_e)
py_x = self.compute_py_x(mu_e)`
return py_x