Hi PyTorch Friends,
I’m trying to building customized layer by following the guide Extending PyTorch Tutorial and use the customized layers to replace the nn.Conv2d and nn.Linear layer in the official example of mnist main.py line 55-59.
However, after replacing with my own customized layers, the testing step (forward) is working without error, while training the new model, it gives an error as “ValueError: optimizer got an empty parameter list”. Also, the new_model.parameters() does not have any items.
The following is my modified Net (nn.Module)
class Decomp_Net(nn.Module):
def __init__(self, path_pretrained_model="mymodel.pth"):
super(Decomp_Net, self).__init__()
# Load the pretrained model
# Load the saved weights
self.path_pretrained_model = path_pretrained_model
try:
params = torch.load(self.path_pretrained_model)
print("Loaded pretrained model.")
except:
raise("No pretrained model saved.")
# Conv Layer 1
self.W_conv1 = params.items()[0]
self.B_conv1 = params.items()[1][1]
self.W_conv1 = self.W_conv1[1].view(10, 25)
self.W_conv1 = self.W_conv1.t()
self.D_conv1, self.X_a_conv1 = create_dic_fuc.create_dic(A=self.W_conv1, M=25, N=10, Lmax=9, Epsilon=0.7, mode=1)
# Conv Layer 2
self.W_conv2 = params.items()[2]
self.B_conv2 = params.items()[3][1]
self.W_conv2 = self.W_conv2[1].view(200, 25)
self.W_conv2 = self.W_conv2.t()
self.D_conv2, self.X_a_conv2 = create_dic_fuc.create_dic(A=self.W_conv2, M=25, N=200, Lmax=199, Epsilon=0.7, mode=1)
# Layer FC1
self.W_fc1 = params.items()[4]
self.B_fc1 = params.items()[5][1]
self.D_fc1, self.X_a_fc1 = create_dic_fuc.create_dic(A=self.W_fc1[1], M=50, N=320, Lmax=319, Epsilon=0.8, mode=1)
# Layer FC2
self.W_fc2 = params.items()[6] # Feching the last fully connect layer of the orinal model
self.B_fc2 = params.items()[7][1]
self.D_fc2, self.X_a_fc2 = create_dic_fuc.create_dic(A=self.W_fc2[1], M=10, N=50, Lmax=49, Epsilon=0.5, mode=1)
self.conv1 = ConvDecomp2d(coefs=self.X_a_conv1, dictionary=self.D_conv1, bias_val=self.B_conv1, input_channels=1, output_channels=10, kernel_size=5, bias=True)
self.conv2 = ConvDecomp2d(coefs=self.X_a_conv2, dictionary=self.D_conv2, bias_val=self.B_conv2, input_channels=10, output_channels=20, kernel_size=5, bias=True)
self.conv2_drop = nn.Dropout2d()
self.fc1 = FCDecomp(coefs=self.X_a_fc1, dictionary=self.D_fc1, bias_val=self.B_fc1, input_features=320, output_features=50)
self.fc2 = FCDecomp(coefs=self.X_a_fc2, dictionary=self.D_fc2, bias_val=self.B_fc2, input_features=50, output_features=10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
I defined the customized function and layer as follows:
class LinearDecomp(Function):
# Note that both forward and backward are @staticmethods
@staticmethod
def forward(ctx, input, coefs, dictionary, bias=None):
weight = torch.mm(dictionary, coefs).cuda() # reconstruct the weight
ctx.save_for_backward(input, weight, dictionary, coefs, bias)
output = input.mm(weight.t())
if bias is not None:
output += bias.unsqueeze(0).expand_as(output)
return output
# This function has only a single output, so it gets only one gradient
@staticmethod
def backward(ctx, grad_output):
input, weight, coefs, dictionary, bias = ctx.saved_variables
grad_input = grad_input = grad_coefs = grad_bias = None
grad_weight = grad_output.t().mm(input) # do not output
if ctx.needs_input_grad[0]:
grad_input = grad_output.mm(weight)
# if ctx.needs_input_grad[1]:
grad_weight = grad_output.t().mm(input) # do not output grad_weight
if ctx.needs_input_grad[2]:
grad_coefs = dictionary.t().mm(grad_weight)
if ctx.needs_input_grad[3]:
grad_dictionary = grad_weight.t().mm(grad_coefs.t())
if bias is not None and ctx.needs_input_grad[4]:
grad_bias = grad_output.sum(0).squeeze(0)
return grad_input, grad_coefs, grad_dictionary, grad_bias
The customized layer is defined as:
class FCDecomp(nn.Module):
def __init__(self, coefs, dictionary, bias_val, input_features, output_features, bias=True):
super(FCDecomp, self).__init__()
self.dictionary = nn.Parameter(dictionary, requires_grad=False).cuda()
self.coefs = nn.Parameter(coefs, requires_grad=True).cuda()
if bias:
self.bias = nn.Parameter(bias_val, requires_grad=True).cuda()
else:
self.register_parameter('bias', None)
def forward(self, input):
return LinearDecomp.apply(input, self.coefs, self.dictionary, self.bias)
Could anyone provide me some suggestion or hints for this issue? Thank you very much!