Certain class attributes are not recognized in forward()

Hello. I am experiencing a strange behaviour on the forward() method of nn.Module. I have a class that inherits nn.Module and initialize some attributes on the init() method. The forward() in this class does not recognize the attribute that is itself an nn.Module.

The context of this error is while trying to run an implementation for DSQ (paper: https://arxiv.org/pdf/1908.05033.pdf, code: https://github.com/ricky40403/DSQ). Either I get “torch.nn.modules.module.ModuleAttributeError: ‘DSQConv’ object has no attribute ‘running_lw’”, or in QuantConv I get “‘QuantMesure object has no attribute ‘quant’””.

Could you post a code snippet to reproduce this behavior, please?

Yes, of course. Thanks for the reply.

Using the code from a PyTorch implementation of DSQ (paper: https://arxiv.org/pdf/1908.05033.pdf, code: GitHub - ricky40403/DSQ: pytorch implementation of "Differentiable Soft Quantization: Bridging Full-Precision and Low-Bit Neural Networks"), the error comes from this class (trimmed some additional functions):

class DSQConv(nn.Conv2d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,
                momentum = 0.1,                
                num_bit = 8, QInput = True, bSetQ = True):
        super(DSQConv, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias)
        self.num_bit = num_bit
        self.quan_input = QInput
        self.bit_range = 2**self.num_bit -1	 
        self.is_quan = bSetQ        
        self.momentum = momentum
        if self.is_quan:
            # using int32 max/min as init and backprogation to optimization
            # Weight
            self.uW = nn.Parameter(data = torch.tensor(2 **31 - 1).float())
            self.lW  = nn.Parameter(data = torch.tensor((-1) * (2**32)).float())
            self.register_buffer('running_uw', torch.tensor([self.uW.data])) # init with uw
            self.register_buffer('running_lw', torch.tensor([self.lW.data])) # init with lw
            self.alphaW = nn.Parameter(data = torch.tensor(0.2).float())
            # Bias
            if self.bias is not None:
                self.uB = nn.Parameter(data = torch.tensor(2 **31 - 1).float())
                self.lB  = nn.Parameter(data = torch.tensor((-1) * (2**32)).float())
                self.register_buffer('running_uB', torch.tensor([self.uB.data]))# init with ub
                self.register_buffer('running_lB', torch.tensor([self.lB.data]))# init with lb
                self.alphaB = nn.Parameter(data = torch.tensor(0.2).float())
              
            # Activation input		
            if self.quan_input:
                self.uA = nn.Parameter(data = torch.tensor(2 **31 - 1).float())
                self.lA  = nn.Parameter(data = torch.tensor((-1) * (2**32)).float())
                self.register_buffer('running_uA', torch.tensor([self.uA.data])) # init with uA
                self.register_buffer('running_lA', torch.tensor([self.lA.data])) # init with lA
                self.alphaA = nn.Parameter(data = torch.tensor(0.2).float())

    def forward(self, x):
        if self.is_quan:
            # Weight Part
            # moving average
            if self.training:
                cur_running_lw = self.running_lw.mul(1-self.momentum).add((self.momentum) * self.lW)
                cur_running_uw = self.running_uw.mul(1-self.momentum).add((self.momentum) * self.uW)
            else:
                cur_running_lw = self.running_lw
                cur_running_uw = self.running_uw

            Qweight = self.clipping(self.weight, cur_running_uw, cur_running_lw)
            cur_max = torch.max(Qweight)
            cur_min = torch.min(Qweight)
            delta =  (cur_max - cur_min)/(self.bit_range)
            interval = (Qweight - cur_min) //delta            
            mi = (interval + 0.5) * delta + cur_min
            Qweight = self.phi_function(Qweight, mi, self.alphaW, delta)
            Qweight = self.sgn(Qweight)
            Qweight = self.dequantize(Qweight, cur_min, delta, interval)

            Qbias = self.bias
            # Bias			
            if self.bias is not None:
                # self.running_lB.mul_(1-self.momentum).add_((self.momentum) * self.lB)
                # self.running_uB.mul_(1-self.momentum).add_((self.momentum) * self.uB)
                if self.training:
                    cur_running_lB = self.running_lB.mul(1-self.momentum).add((self.momentum) * self.lB)
                    cur_running_uB = self.running_uB.mul(1-self.momentum).add((self.momentum) * self.uB)
                else:
                    cur_running_lB = self.running_lB
                    cur_running_uB = self.running_uB

                Qbias = self.clipping(self.bias, cur_running_uB, cur_running_lB)
                cur_max = torch.max(Qbias)
                cur_min = torch.min(Qbias)
                delta =  (cur_max - cur_min)/(self.bit_range)
                interval = (Qbias - cur_min) //delta
                mi = (interval + 0.5) * delta + cur_min
                Qbias = self.phi_function(Qbias, mi, self.alphaB, delta)
                Qbias = self.sgn(Qbias)
                Qbias = self.dequantize(Qbias, cur_min, delta, interval)

            # Input(Activation)
            Qactivation = x
            if self.quan_input:
                                
                if self.training:                    
                    cur_running_lA = self.running_lA.mul(1-self.momentum).add((self.momentum) * self.lA)
                    cur_running_uA = self.running_uA.mul(1-self.momentum).add((self.momentum) * self.uA)
                else:
                    cur_running_lA = self.running_lA
                    cur_running_uA = self.running_uA
                    
                Qactivation = self.clipping(x, cur_running_uA, cur_running_lA)
                cur_max = torch.max(Qactivation)
                cur_min = torch.min(Qactivation)
                delta =  (cur_max - cur_min)/(self.bit_range)
                interval = (Qactivation - cur_min) //delta
                mi = (interval + 0.5) * delta + cur_min                
                Qactivation = self.phi_function(Qactivation, mi, self.alphaA, delta)
                Qactivation = self.sgn(Qactivation)
                Qactivation = self.dequantize(Qactivation, cur_min, delta, interval)
            
            output = F.conv2d(Qactivation, Qweight, Qbias,  self.stride, self.padding, self.dilation, self.groups)

        else:
            output =  F.conv2d(x, self.weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

        return output

I get the following error message:

Traceback (most recent call last):
File “train.py”, line 579, in
main()
File “train.py”, line 137, in main
main_worker(args.gpu, ngpus_per_node, args)
File “train.py”, line 320, in main_worker
train(train_loader, model, criterion, optimizer, epoch, args)
File “train.py”, line 407, in train
output = model(images)
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py”, line 153, in forward
return self.module(*inputs[0], **kwargs[0])
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torchvision/models/resnet.py”, line 220, in forward
return self._forward_impl(x)
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torchvision/models/resnet.py”, line 203, in _forward_impl
x = self.conv1(x)
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/afonso/Projects/quantization/DSQ/DSQConv.py”, line 88, in forward
cur_running_lw = self.running_lw.mul(1-self.momentum).add((self.momentum) * self.lW)
File “/home/afonso/anaconda3/envs/nlp/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 772, in getattr
type(self).name, name))
torch.nn.modules.module.ModuleAttributeError: ‘DSQConv’ object has no attribute ‘running_lw’

Additionally, from my implementation of a post-training uniform quantization for 3D models, adapted with some classes from that same DSQ project, for the following code:

class QuantConv3d(nn.Conv3d):
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1, padding=0, dilation=1, groups=1, bias=True, num_bits=8, num_bits_weight=None, momentum=0.1):
        super(QuantConv3d, self).__init__(in_channels, out_channels, kernel_size,
                                          stride, padding, dilation, groups, bias)
        self.num_bits = num_bits
        self.num_bits_weight = num_bits_weight or num_bits
        self.quantize_input = QuantMeasure(num_bits=num_bits, momentum=momentum)

    def forward(self, input):
        input = self.quantize_input(input)
        qweight = quantize(self.weight, num_bits=self.num_bits_weight,
                           min_value=float(self.weight.min()),
                           max_value=float(self.weight.max()))
        if self.bias is not None:
            qbias = quantize(self.bias, num_bits=self.num_bits_weight)
        else:
            qbias = None

        output = F.conv3d(input, qweight, qbias, self.stride,
                          self.padding, self.dilation, self.groups)
        return output

I get the following error message:

Traceback (most recent call last):
File “main.py”, line 155, in
main()
File “main.py”, line 97, in main
opt.device)
File “/home/ctm/afonso/easyride/acceleration/src/core/trainer.py”, line 41, in train_epoch
outputs = model(inputs)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py”, line 153, in forward
return self.module(*inputs[0], **kwargs[0])
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/ctm/afonso/easyride/acceleration/src/models/mobilenetv2.py”, line 126, in forward
x = self.features(x)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/container.py”, line 117, in forward
input = module(input)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/container.py”, line 117, in forward
input = module(input)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/ctm/afonso/easyride/acceleration/src/quantization/uni_quant_3d.py”, line 139, in forward
input = self.quantize_input(input)
File “/home/ctm/.conda/envs/3dcnn/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 772, in getattr
type(self).name, name))
torch.nn.modules.module.ModuleAttributeError: ‘QuantConv3d’ object has no attribute ‘quantize_input’

Both implementations use the PyTransformer (https://github.com/ricky40403/PyTransformer/blob/master/transformers/torchTransformer.py) trans_layer method to permute standard conv layers to quantized ones. The code goes as follows:

def trans_layers(self, model, update = True):
		"""!
		This function transform layer by layers in register dictionaries
		@param model: input model to transfer
		@param update: default is True, whether to update the parameter from the original layer or not. 
		Note that it will update matched parameters only.
		@return transformed model
		"""
		# print("trans layer")
		if len(self._register_dict) == 0:
			print("No layer to swap")
			print("Please use register( {origin_layer}, {target_layer} ) to register layer")
			return model
		else:
			for module_name in model._modules:			
				# has children
				if len(model._modules[module_name]._modules) > 0:
					self.trans_layers(model._modules[module_name])
				else:
					if type(getattr(model, module_name)) in self._register_dict:
						# use inspect.signature to know args and kwargs of __init__
						_sig = inspect.signature(type(getattr(model, module_name)))
						_kwargs = {}
						for key in _sig.parameters:
							if _sig.parameters[key].default == inspect.Parameter.empty: #args 
								# assign args
								# default values should be handled more properly, unknown data type might be an issue
								if 'kernel' in key:
									# _sig.parameters[key].replace(default=inspect.Parameter.empty, annotation=3)
									value = 3
								elif 'channel' in key:
									# _sig.parameters[key].replace(default=inspect.Parameter.empty, annotation=32)
									value = 32
								else:
									# _sig.parameters[key].replace(default=inspect.Parameter.empty, annotation=None)
									value = None
						
								_kwargs[key] = value

						_attr_dict = getattr(model, module_name).__dict__
						_layer_new = self._register_dict[type(getattr(model, module_name))](**_kwargs) # only give positional args
						_layer_new.__dict__.update(_attr_dict)

						setattr(model, module_name, _layer_new)
		return model

Thank you for your help in advance.