class ConditionLayer(nn.Module):
def __init__(self, layer_template, layer_name):
super(ConditionLayer, self).__init__()
self.layer_template = layer_template
self.layer_name = layer_name
self.set_params(*self.layer_template)
def set_params(self, cout, cin=None, height= None, width = None):
self.cout = cout
self.cin = cin
if height is not None:
self.height = height
self.width = width
self.spatial = height * width
else:
self.spatial = None
self.M_o = nn.Parameter(torch.eye(self.cout)).cuda()
if self.cin is not None:
self.M_i = nn.Parameter(torch.eye(self.cin)).cuda()
if self.spatial is not None:
self.M_f = nn.Parameter(torch.eye(self.spatial)).cuda()
def forward(self,x):
x_ =x
if self.spatial is not None:
x = x.permute(2,3,0,1)
x = x.view(-1,self.cout,self.cin)
x = x.view(-1,self.cout*self.cin)
x = torch.matmul(self.M_f,x)
x = x.view(self.spatial,self.cout,self.cin)
x = x.view(self.height, self.width, self.cout, self.cin)
x = x.permute(2,3,0,1)
if self.cin is not None:
x = x.transpose(1,0).contiguous()
x = x.view(self.cin,-1)
x = torch.matmul(self.M_i,x)
if self.spatial is not None:
x = x.view(self.cin, self.cout, self.height, self.width)
else:
x = x.view(self.cin,self.cout)
x = x.transpose(1,0).contiguous()
x = x.view(self.cout, -1)
x = torch.matmul(self.M_o,x)
x = x.view(*x_.shape)
return x
class Preconditioner(nn.Module):
def __init__(self, model_template, optimizer=None, step_size=0.1,
learn_step_size=False, per_param_step_size=False,
scheduler=None,device=None):
super(Preconditioner, self).__init__()
self.model_template = model_template
self.init_parameters()
def init_parameters(self):
self.condition_layer_obj = {}
for key in self.model_template.keys():
self.condition_layer_obj[key] = ConditionLayer(self.model_template[key], key)
def forward(self,grad):
preconditioned_grad = OrderedDict()
for layer_name in grad.keys():
preconditioned_grad[layer_name] = self.condition_layer_obj[layer_name](grad[layer_name])
return preconditioned_grad
The above snippet creates a class Preconditioner (which inherits nn.Module), in the fucntion init_parameters, we create a dictionary, which stores objects of the class ConditionalLayer, where in we create trainable parameters self.M_o, self.M_i, self.M_f, but Preconditioner.named_parameters() returns an empty iterator, shouldnt it return the trainable params ?