Regarding parameter scaling of a model and gradient on the updated parameters

I have made a one layer network. I have model’s linear.weight and linear.bias in a dictionary, and I have passed the dictionary to the new nn.module class I have made. I have to find the gradient of loss wrt theta which I have multiplied with the model’s parameter. But the error is coming like “One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior”. Please help me with this so that the computation graph is maintained.

Thanks in advance

class Net1(nn.Module):

def __init__(self):
    super(Net1, self).__init__()
    self.linear = nn.Linear(3072, 10)

def forward(self, x):
    out = x.view(x.size(0), -1)
    out = self.linear(out)
    return out

class lin(nn.Module):

def __init__(self, wt, bs, inp, output):
    super(lin, self).__init__()
    self.linear  = nn.Linear(inp, output)
    self.linear.weight = nn.parameter.Parameter(wt)
    self.linear.bias = nn.parameter.Parameter(bs)

def forward(self, input):
    out = input.view(input.size(0), -1)
    out = self.linear(out)
    return out

class Net_test(nn.Module):

def __init__(self,paramsel):
    super(Net_test, self).__init__()
    self.lnr = lin(paramsel['linear.weight'], paramsel['linear.bias'], 3072, 10)

def forward(self, x):
    print("Inside net test")
    out = x.view(x.size(0), -1)
    out = self.lnr(out) 
    return out

classf = Net1()
classf = classf.cuda()
paramNettest = {}
theta = torch.randn(1, requires_grad=True).cuda()

for i, data in enumerate(train_loader, 0):

inputs, labels = data
inputs, labels = inputs.cuda(), Variable(labels.cuda())

for name, param in classf.named_parameters(): 
    paramNettest[name] = param * theta

classft = Net_test(paramNettest)    
classft = classft.cuda()
op2 = classft(inputs)
loss =F.cross_entropy(op2,labels,reduction='mean')


@albanD @ptrblck any suggestions?