Computation graph of setting weights

sait · May 11, 2020, 4:52am

I need a clarification of code written for some code in FastAI2 library.

this is the code WeightDropout written in FastAI2 library.

 class WeightDropout(Module):
        "A module that warps another layer in which some weights will be replaced by 0 during training."

        def __init__(self, module, weight_p, layer_names='weight_hh_l0'):
            self.module,self.weight_p,self.layer_names = module,weight_p,L(layer_names)
            for layer in self.layer_names:
                #Makes a copy of the weights of the selected layers.
                w = getattr(self.module, layer)
                delattr(self.module, layer)
                self.register_parameter(f'{layer}_raw', nn.Parameter(w.data))
                setattr(self.module, layer, F.dropout(w.data, p=self.weight_p, training=False))
                if isinstance(self.module, (nn.RNNBase, nn.modules.rnn.RNNBase)):
                    self.module.flatten_parameters = self._do_nothing

        def _setweights(self):
            "Apply dropout to the raw weights."
            for layer in self.layer_names:
                raw_w = getattr(self, f'{layer}_raw')
                setattr(self.module, layer, F.dropout(raw_w.data, p=self.weight_p, training=self.training))

        def forward(self, *args):
            self._setweights()
            with warnings.catch_warnings():
                #To avoid the warning that comes because the weights aren't flattened.
                warnings.simplefilter("ignore")
                return self.module.forward(*args)

        def reset(self):
            for layer in self.layer_names:
                raw_w = getattr(self, f'{layer}_raw')
                setattr(self.module, layer, 
    F.dropout(raw_w.data, p=self.weight_p, training=False))
            if hasattr(self.module, 'reset'): self.module.reset()

        def _do_nothing(self): pass

where above code randomly drops weights in weight matrix of hidden layers.I am primarily interested in ,

 def _setweights(self):
                "Apply dropout to the raw weights."
                for layer in self.layer_names:
                    raw_w = getattr(self, f'{layer}_raw')
                    setattr(self.module, layer, F.dropout(raw_w.data, p=self.weight_p, training=self.training))

my question is that, does this operation of changing weights is recorded in gradient computation.

ptrblck · May 12, 2020, 4:32am

I don’t think it will be recorded, as the .data attribute is used, which will prevent Autograd to track this operation.
This is generally not recommended and the block should probably be wrapped in a with torch.no_grad() block. However, I’m not familiar with the detailed implementation, and you might get a better answer in the FastAI forum.