I want to create mask from score threshold and multiply it with weight matrix.
To create mask I’m using condition shown in code. but for all score parameters grad are zero.
How to compute gradient for score parameters ?
class SuperMaskMLP(torch.nn.Module):
def __init__(self,num_features,hidden_nodes_list,num_classes):
super(SuperMaskMLP, self).__init__()
num_hidden_layes = len(hidden_nodes_list)
self.hidden = torch.nn.ModuleList()
self.score = torch.nn.ModuleList()
if num_hidden_layes==0:
self.hidden.append(torch.nn.Linear(num_features, num_classes))
self.score.append(torch.nn.Linear(num_features, num_classes))
else:
self.hidden.append(torch.nn.Linear(num_features, hidden_nodes_list[0]))
self.score.append(torch.nn.Linear(num_features, hidden_nodes_list[0]))
for k in range(num_hidden_layes-1):
self.hidden.append(torch.nn.Linear(hidden_nodes_list[k], hidden_nodes_list[k+1]))
self.score.append(torch.nn.Linear(hidden_nodes_list[k], hidden_nodes_list[k+1]))
self.hidden.append(torch.nn.Linear(hidden_nodes_list[num_hidden_layes-1], num_classes))
self.score.append(torch.nn.Linear(hidden_nodes_list[num_hidden_layes-1], num_classes))
# input : features
# output: logits , probabilities
def forward(self, x):
out = x
i=0
for layer in self.hidden[:-1]:
wmask = self.score[i].weight.clone()
wmask[self.score[i].weight >= (self.score[i].weight.mean())] = 1
wmask[self.score[i].weight < (self.score[i].weight.mean())] = 0
bmask = self.score[i].bias.clone()
bmask[self.score[i].bias >= (self.score[i].bias.mean())] = 1
bmask[self.score[i].bias < (self.score[i].bias.mean())] = 0
w = layer.weight * wmask
b = layer.bias * bmask
out = torch.matmul(out,torch.t(w)) + (b)
out = F.relu(out)
i+=1
wmask = self.score[i].weight.clone()
wmask[self.score[i].weight >= (self.score[i].weight.mean())] = 1
wmask[self.score[i].weight < (self.score[i].weight.mean())] = 0
bmask = self.score[i].bias.clone()
bmask[self.score[i].bias >= (self.score[i].bias.mean())] = 1
bmask[self.score[i].bias < (self.score[i].bias.mean())] = 0
w = self.hidden[-1].weight *wmask
b = self.hidden[-1].bias * bmask
logits = torch.matmul(out,torch.t(w)) + (b)
probas = F.log_softmax(logits, dim=1)
return 1,logits, probas