that issue got resolved…but now I have another doubt…
for the cascade architecture I have to set different learning rate for different regions.
Like for the connection from last hidden neuron to output neuron, I have to set some other learning rate compared to connections from all input neurons to output neuron. But this has to be done all dynamically which I am unable to do…
class Casper(torch.nn.Module):
def __init__(self, d_in):
super(Casper, self).__init__()
self.layers = torch.nn.ModuleList([])
self.activation = torch.nn.ReLU()
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
if len(self.layers) == 1:
y_pred = self.sigmoid(self.layers[0](x))
else:
for i in range(0, len(self.layers)-1):
x = torch.cat([x, self.activation(self.layers[i](x))], 1)
y_pred = self.sigmoid(self.layers[len(self.layers)-1](x))
return y_pred.squeeze()
net = Casper(input_neurons)
loss_func = torch.nn.BCELoss()
all_losses = []
lr_policy = {}
count = 1
while count <= 4:
net.layers.append(torch.nn.Linear(len(net.layers)+input_neurons, 1))
if count == 1:
optimiser = torch.optim.Rprop(net.layers[0].parameters(),lr=0.01,etas=(0.5,1.2),step_sizes=(1e-06,50))
else:
optimiser = torch.optim.Rprop([{'params': net.layers[len(net.layers)-2].parameters(), 'lr': 0.2},
{'params': net.layers[len(net.layers)-1].weight, 'lr': 0.005},
{'params': net.layers[len(net.layers)-1].bias, 'lr': 0.005}]
, lr=0.001,etas=(0.5,1.2),step_sizes=(1e-06,50))
for epoch in range(num_epochs):
Y_pred = net(X)
loss = loss_func(Y_pred,Y.type(torch.FloatTensor))
all_losses.append(loss.item())
if epoch % 50 == 0:
# _, predicted = torch.max(Y_pred, 1)
predicted = Y_pred
total = predicted.size(0)
correct = predicted.data.numpy() == Y.data.numpy()
print('Epoch [%d/%d] Loss: %.4f Accuracy: %.2f %%'
% (epoch + 1, num_epochs, loss.item(), 100 * sum(correct)/total))
net.zero_grad()
loss.backward()
optimiser.step()
count+=1
Now under optimiser “{‘params’: net.layers[len(net.layers)-1].weight, ‘lr’: 0.005}”
I have to split the weight matrix in something this manner weight[:,:-1] and weight[:,-1:] so that I can apply different learning rates for them…but how I can do that dynamically?
Please suggest…