Model does not learn after ternarization of weights contrary to the paper mentioned below

puranjay_mishra · January 18, 2022, 6:06am

Hello. I apologize if this not the right place to ask this but I don’t now any place better!

I’m implementing the ‘Ternary Weights Network’ paper by Fengfu Li , Bo Zhang and Bin Liu([1605.04711] Ternary Weight Networks).

I’m training a simple Covnet with linear layers on the MNIST dataset. Without ternarization, the exact same model converges with high accuracy, but after ternarization of the linear layers, the model predicts all the classes with equal probability(0.1 for all the classes).

What could be the reason for this?

Code which ternarizes the linear layers -

class TernarizeOp():
def init(self, model):
count_targets = 0
self.model = model
for m in model.modules():
if isinstance(m, nn.Linear):
count_targets += 1
self.ternarize_range = np.linspace(0, count_targets - 1, count_targets).astype(‘int’).tolist()
self.num_of_params = len(self.ternarize_range)
self.saved_params = []
self.target_modules = []

    for m in model.modules():
        if isinstance(m, nn.Linear):
            tmp = m.weight.data.clone()
            self.saved_params.append(tmp)  # tensor
            self.target_modules.append(m.weight)  # Parameter

def SaveWeights(self):
    for index in range(self.num_of_params):
        self.saved_params[index].copy_(self.target_modules[index].data)

def TernarizeWeights(self):
    alpha = []
    for index in range(self.num_of_params):
        output,alpha_tmp = self.Ternarize(self.target_modules[index].data)
        self.target_modules[index].data = output
        alpha.append(alpha_tmp)
    return alpha

def Ternarize(self, tensor):
    tensor = tensor.cuda()
    # print(tensor[0])

    output = torch.zeros(tensor.size()).type(torch.cuda.FloatTensor)

    new_tensor = tensor.abs()
    delta = torch.mul(0.75, torch.mean(new_tensor, dim=1))
    # print(delta[0])
    new_tensor = torch.t(new_tensor)

    t = torch.greater_equal(new_tensor,delta).type(torch.cuda.FloatTensor)
    # print(t[0])
    x = torch.greater(tensor,0).type(torch.cuda.FloatTensor)
    y = torch.less(tensor,0).type(torch.cuda.FloatTensor)
    y = torch.mul(y,-1)
    z = torch.add(x,y)
    t = torch.t(t)
    final = torch.mul(t,z)

    new_tensor = torch.t(new_tensor)

    final.cuda()
    alpha = torch.mean(torch.mul(final,new_tensor),dim=1)

    output = torch.add(output,final)

    # print(output[0])

    return (output,alpha)

The code which calls the Ternarizeop class in the training loop is as follows -

alpha = ternarize_op.TernarizeWeights()
l = []
l.append(imgs)
l.append(alpha)
output = model(l)