Hello all, I got error when using 2 schedulers to automate learning rate change for 2 optimizers. however one scheduler is giving error, I have no clue why. Can you help? Thanks.
import torch.nn as nn
import torch
import numpy as np
import torchvision
from torch.autograd import Variable, Function
from torch.optim.lr_scheduler import ReduceLROnPlateau
train_data = torchvision.datasets.MNIST(
root='/home/data/input/mnist',
transform=torchvision.transforms.ToTensor(),
download=True,
)
# hyper parameters
batch_size = 100 # batch size of images
img_h = 28
Dic_size = 50
ld = 4 # sparse penalty
lr = 0.0001 # learning rate
mom = 0.2
EPOCH = 100
# depends on size of the dictionary, number of atoms.
D = Variable(torch.from_numpy(np.random.normal(0,1,(Dic_size,img_h,img_h))).type(torch.FloatTensor), requires_grad=True)
# hx sparse representation
ht = Variable(torch.from_numpy(np.random.normal(0,1,(batch_size,Dic_size,1,1))).type(torch.FloatTensor), requires_grad=True)
# Dictionary loss function
def loss_Dictionary(x,D,ht):
holder = []
for i in range(len(ht)):
holder.append((0.5*torch.norm((x[i]-(D*ht[i]).sum(dim=0)),p=2)**2))
return torch.mean(torch.stack(holder))
# customized shrink function to update gradient
shrink_ht = lambda x: torch.stack([torch.sign(i)*torch.max(torch.abs(i)-lr*ld,0)[0] for i in x])
optimizer_ht = torch.optim.RMSprop([ht], lr=lr)
optimizer_D = torch.optim.RMSprop([D], lr=lr)
scheduler_ht = ReduceLROnPlateau(optimizer_ht,'min')
scheduler_D = ReduceLROnPlateau(optimizer_D,'min')
## random selection of the number
np.random.seed(0)
sample_holder = []
sampler = np.random.choice(60000,100,replace=False)
for sam_id in sampler:
sample_holder.append(train_data.__getitem__(sam_id)[0][0])
train_batch = torch.stack(sample_holder, dim=0).view(-1,28,28)
x = Variable(train_batch.view(-1,28,28))
## optimization step
for i in range(EPOCH):
## image update sparse representation
holder_loss_ht = [] # to hold sparse representation loss value
for idx in range(len(x)):
optimizer_ht.zero_grad() # clear up gradients
loss_ht = 0.5*torch.norm((x[idx]-(D*ht[idx]).sum(dim=0)),p=2)**2
loss_ht.backward() # back propogation and calculate gradients
optimizer_ht.step() # update parameters with gradients
ht.data[idx] = shrink_ht(ht.data[idx]) # customized shrink function. update the data (ht.data) only.
holder_loss_ht.append(loss_ht.data)
val_ht_loss = torch.mean(torch.stack(holder_loss_ht))
scheduler_ht.step(val_ht_loss)
print('Epoc: ', i, ' Sparse loss: ', val_ht_loss)
## batch update from resconstruction
optimizer_D.zero_grad() # clear up gradients
loss_D = loss_Dictionary(x, D, ht) # x and ht are batch
loss_D.backward() # back propogation and calculate gradients
optimizer_D.step() # update parameters with gradients
scheduler_D.step(loss_D)
## normalize it unit norm
for ii in range(len(D)):
D.data[ii,:,:] = D.data[ii,:,:]/torch.norm(D.data[ii,:,:],p=2) # Atom normalization.
#D.data[:,ii,ii] = D.data[:,ii,ii]/torch.norm(D.data[:,ii,ii],p=2) # column normalization - cross features.
print('Epoc: ', i, ' Dictionary loss: ', loss_D)
I got error on scheduler_D.step(loss_D) as below
Epoc: 0 Sparse loss: 1.592491530776024
RuntimeError Traceback (most recent call last)
in ()
27 loss_D.backward() # back propogation and calculate gradients
28 optimizer_D.step() # update parameters with gradients
β> 29 scheduler_D.step(loss_D)
30 ## normalize it unit norm
31 for ii in range(len(D)):
/home/miniconda3/lib/python3.6/site-packages/torch/optim/lr_scheduler.py in step(self, metrics, epoch)
294 self.last_epoch = epoch
295
β> 296 if self.is_better(current, self.best):
297 self.best = current
298 self.num_bad_epochs = 0
/home/miniconda3/lib/python3.6/site-packages/torch/optim/lr_scheduler.py in (a, best)
330 if mode == βminβ and threshold_mode == βrelβ:
331 rel_epsilon = 1. - threshold
β> 332 self.is_better = lambda a, best: a < best * rel_epsilon
333 self.mode_worse = float(βInfβ)
334 elif mode == βminβ and threshold_mode == βabsβ:
RuntimeError: value cannot be converted to type float without overflow: inf