Hello. I apologize if this not the right place to ask this but I don’t now any place better!
I’m implementing the ‘Ternary Weights Network’ paper by Fengfu Li , Bo Zhang and Bin Liu([1605.04711] Ternary Weight Networks).
I’m training a simple Covnet with linear layers on the MNIST dataset. Without ternarization, the exact same model converges with high accuracy, but after ternarization of the linear layers, the model predicts all the classes with equal probability(0.1 for all the classes).
What could be the reason for this?
Code which ternarizes the linear layers -
class TernarizeOp():
def init(self, model):
count_targets = 0
self.model = model
for m in model.modules():
if isinstance(m, nn.Linear):
count_targets += 1
self.ternarize_range = np.linspace(0, count_targets - 1, count_targets).astype(‘int’).tolist()
self.num_of_params = len(self.ternarize_range)
self.saved_params = []
self.target_modules = []
for m in model.modules():
if isinstance(m, nn.Linear):
tmp = m.weight.data.clone()
self.saved_params.append(tmp) # tensor
self.target_modules.append(m.weight) # Parameter
def SaveWeights(self):
for index in range(self.num_of_params):
self.saved_params[index].copy_(self.target_modules[index].data)
def TernarizeWeights(self):
alpha = []
for index in range(self.num_of_params):
output,alpha_tmp = self.Ternarize(self.target_modules[index].data)
self.target_modules[index].data = output
alpha.append(alpha_tmp)
return alpha
def Ternarize(self, tensor):
tensor = tensor.cuda()
# print(tensor[0])
output = torch.zeros(tensor.size()).type(torch.cuda.FloatTensor)
new_tensor = tensor.abs()
delta = torch.mul(0.75, torch.mean(new_tensor, dim=1))
# print(delta[0])
new_tensor = torch.t(new_tensor)
t = torch.greater_equal(new_tensor,delta).type(torch.cuda.FloatTensor)
# print(t[0])
x = torch.greater(tensor,0).type(torch.cuda.FloatTensor)
y = torch.less(tensor,0).type(torch.cuda.FloatTensor)
y = torch.mul(y,-1)
z = torch.add(x,y)
t = torch.t(t)
final = torch.mul(t,z)
new_tensor = torch.t(new_tensor)
final.cuda()
alpha = torch.mean(torch.mul(final,new_tensor),dim=1)
output = torch.add(output,final)
# print(output[0])
return (output,alpha)
The code which calls the Ternarizeop class in the training loop is as follows -
alpha = ternarize_op.TernarizeWeights()
l = []
l.append(imgs)
l.append(alpha)
output = model(l)