I have a backwards function that is supposed to prune some parameters by multiplying the weight matrix with a binary matrix. Somehow the model never gets changed, it always remains the same, why might that be? I have checked, the multiplied tensors have identical dimensions, the model is set to training, the the keys exist in the model dictionary and the types are also compatible. Ive tried setting the weight tensor with setattr, with setting it directly, with using the state dictionary, but the model remains the same.
weight_value = self.prev_layer_weights[name]*difference_change
statesdict[prev_layer]= weight_value
self.model.load_state_dict(statesdict)
Best wishes and thanks a lot for any help
PS: I tried to leave the code short for understanding better but in case someone is curious, this is the whole function
def backward(self):
assert self.model.training, “Model is not in training mode”
old_parameters = self.model.parameters()
# self.compute_saliency_map(self.input,self.label).show()newlayers = {} if self.changed_activations=={}: self.default_loss.zero_grad() return self.default_loss """ this is the correct code, i just want to test something print("the sum of changes is ") print(torch.sum(difference_change).item()) weight_value = self.prev_layer_weights[item]*difference_change.squeeze(0).unsqueeze(1)*1000 anti_overfitting_constant = weight_value.mean() newlayers[item]= (weight_value-anti_overfitting_constant) """ statesdict = self.model.state_dict() prev_layer = None for name, layer in list(self.model.named_children())+[("output",None)]: if name not in self.activations.keys(): layer.zero_grad() prev_layer = name + ".weight" print(prev_layer) continue # print(f"{name} has the shape {self.activations[name].shape} on the activations, {self.changed_activations[name].shape} on the changed activations as well as {self.prev_layer_weights[name].shape} for the weights") difference_change=abs((self.activations[name]-self.changed_activations[name]).squeeze(0).unsqueeze(1)*self.prev_layer_weights[name]) percentile = (self.marked_pixels_count*3)/self.input.numel() limit = torch.quantile(difference_change, percentile).item() # limit = 0.01 # self.distribution(difference_change) # print("The limit in this case was "+str(limit)) difference_change[(difference_change>limit)]=0 difference_change[(difference_change > 0)] = 1 num_zeros = torch.sum(difference_change == 0).item() # Find the number of 1s num_ones = torch.sum(difference_change == 1).item() print(f"Number of 0s: {num_zeros}") print(f"Number of 1s: {num_ones}") # self.layer_factors[name]=difference_change# * self.marked_pixels_count/(self.width*self.height) # self.layer_factors[name]= difference_change.squeeze(0).unsqueeze(1) weight_value = self.prev_layer_weights[name]*difference_change print(f"shape is {self.prev_layer_weights[name].shape} or {difference_change.shape}") statesdict[prev_layer]= weight_value old_stuff =getattr(self.model, prev_layer.rstrip(".weight")) with torch.no_grad(): old_stuff.weight.copy_(weight_value) # old_weights = setattr(old_stuff, "weight",nn.Parameter(weight_value)) # print(old_weights) # print(weight_value) # assert old_weights is not weight_value print(f"i am trying to change {prev_layer} by {num_zeros} zero entries") try: layer.zero_grad() except: pass prev_layer = name + ".weight" self.model.load_state_dict(statesdict) new_parameters = self.model.parameters() print(f"The difference between the two models is {self.calculate_parameter_change(old_params=old_parameters,new_params=new_parameters)}") # Check for missing and unexpected keys """ if missing_keys: print("Missing keys in state_dict:", missing_keys) if unexpected_keys: print("Unexpected keys in state_dict:", unexpected_keys) """ self.model.zero_grad() self.compute_saliency_map(self.input, self.label).show() self.measure_impact() # self.improve_image_attention() self.marked_pixels = None