Hello,
l have a strange problem l run a model with decay=0.95 and l get 75% after 3 epochs. now l would like to fine tune the hyperparameter decay. l run my model in a for loop with different decay values :
first strange thing is that with decay=0.95 my model performance decreases to 56% and for the rest of decay values it return 0% for train and 0 % for test.
However, when l lunch it with different values manually (one by one ) rather than in a for loop. The algorithm works well .
What is wrong with my code ?
when l run the following code (decay=0.95) l get 75% on validation during epoch 3
learning_rate = 0.0001
# dropout_value = 0.5
dropout_value = 0.2
l2_regularization = 5e-4
# batch_size = 100
batch_size = 4
num_epochs = 10
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // batch_size
print('num_epochs=', num_epochs, ', train_size=', train_size, ', nb_iter=', nb_iter)
# Optimizer
global_lr = learning_rate
global_step = 0
decay = 0.95
decay_steps = train_size
lr = learning_rate
optimizer = net.update(lr)
# loop over epochs
indices = collections.deque()
for epoch in range(num_epochs): # loop over the dataset multiple times
# reshuffle
np.random.seed(seed)
indices.extend(np.random.permutation(train_size)) # rand permutation
# reset time
t_start = time.time()
# extract batches
running_loss = 0.0
running_accuray = 0.0
running_total = 0.0
# import ipdb;ipdb.set_trace()
while len(indices) >= batch_size:
# print('len indices ',len(indices))
# print('batch_size ', batch_size)
# print('hello')
# print('batch_size ', batch_size)
# print('indices ', indices)
# extract batches
# import ipdb;ipdb.set_trace()
batch_idx = [indices.popleft() for i in range(batch_size)]
# print('batch idx', batch_idx)
# print('batch idx train ', batch_idx)
# import ipdb;ipdb.set_trace()
# print('here it ok')
# batch_idx = tuple(slice(x) for x in batch_idx)
# import ipdb;ipdb.set_trace()
# try:
train_x, train_y, coord_train, adj_train = train_data[batch_idx, :], train_labels[batch_idx], \
coord_train_xy[
batch_idx], train_adjacency_matrix[batch_idx]
# except:
# import ipdb;ipdb.set_trace()
# print('here it ok 2')
train_x = Variable(torch.FloatTensor(train_x).type(dtypeFloat), requires_grad=False)
# L_train = Variable(torch.FloatTensor(L_train).type(dtypeFloat), requires_grad=False)
# lmax_train = Variable(torch.FloatTensor(lmax_train).type(dtypeFloat), requires_grad=False)
# print('here it ok 3')
train_y = train_y.astype(np.int64)
# print('here it ok 4')
train_y = torch.LongTensor(train_y).type(dtypeLong)
# print('here it ok 5')
train_y = Variable(train_y, requires_grad=False)
# print('here it ok 6')
coord_train = coord_train.astype(np.int64)
coord_train = torch.LongTensor(coord_train).type(dtypeLong)
coord_train = Variable(coord_train, requires_grad=False)
# import ipdb;ipdb.set_trace()
# adj_train=adj_train.astype(np.int64)
# adj_train=Variable(torch.FloatTensor(adj_train).type(dtypeFloat), requires_grad=False)
# Forward
y = net.forward(train_x, dropout_value, L_train, lmax_train, coord_train, adj_train)
# import ipdb;ipdb.set_trace()
loss = net.loss(y, train_y, l2_regularization)
loss_train = loss.data[0]
# Accuracy
acc_train = net.evaluation(y, train_y.data)
# backward
loss.backward()
# Update
global_step += batch_size # to update learning rate
optimizer.step()
optimizer.zero_grad()
# loss, accuracy
running_loss += loss_train
running_accuray += acc_train
running_total += 1
# print
if not running_total % 100: # print every x mini-batches
print('epoch= %d, i= %4d, loss(batch)= %.4f, accuray(batch)= %.2f' % (
epoch + 1, running_total, loss_train, acc_train))
# print('helooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo')
# print
# import ipdb;ipdb.set_trace()
t_stop = time.time() - t_start
print('epoch= %d, loss(train)= %.3f, accuracy(train)= %.3f, time= %.3f, lr= %.5f' %
(epoch + 1, running_loss / running_total, running_accuray / running_total, t_stop, lr))
# update learning rate
lr = global_lr * pow(decay, float(global_step // decay_steps))
optimizer = net.update_learning_rate(optimizer, lr)
# Test set
running_accuray_test = 0
running_total_test = 0
indices_test = collections.deque()
indices_test.extend(range(test_data.shape[0]))
t_start_test = time.time()
while len(indices_test) >= batch_size:
batch_idx_test = [indices_test.popleft() for i in range(batch_size)]
# print('batch idx test ', batch_idx_test)
test_x, test_y, coord_test, adj_test = test_data[batch_idx_test, :], test_labels[batch_idx_test], \
coord_test_xy[
batch_idx_test], test_adjacency_matrix[batch_idx_test]
test_x = Variable(torch.FloatTensor(test_x).type(dtypeFloat), requires_grad=False)
coord_test = coord_test.astype(np.int64)
coord_test = torch.LongTensor(coord_test).type(dtypeLong)
coord_test = Variable(coord_test, requires_grad=False)
# adj_test=adj_test.astype(np.int64)
# adj_test=torch.LongTensor(adj_test).type(dtypeLong)
# adj_test=Variable(adj_test, requires_grad=False)
y = net.forward(test_x, 0.0, L_test, lmax_test, coord_test, adj_test)
test_y = test_y.astype(np.int64)
test_y = torch.LongTensor(test_y).type(dtypeLong)
test_y = Variable(test_y, requires_grad=False)
acc_test = net.evaluation(y, test_y.data)
running_accuray_test += acc_test
running_total_test += 1
t_stop_test = time.time() - t_start_test
print(' accuracy(test) = %.3f %%, time= %.3f' % (running_accuray_test / running_total_test, t_stop_test))
However when l run it with different decay values
decays=[0.95,0.90,0.85,0.80,0.75,0.70,0.65,0.60,0.55,0.50,0.45,0.40,0.35,0.30,0.25,0.20,0.15,0.10,0.05]
for decay in decays:
for epoch in range(num_epochs): # loop over the dataset multiple times
# reshuffle
np.random.seed(seed)
indices.extend(np.random.permutation(train_size)) # rand permutation
# reset time
t_start = time.time()
# extract batches
running_loss = 0.0
running_accuray = 0.0
running_total = 0.0
# import ipdb;ipdb.set_trace()
while len(indices) >= batch_size:
# print('len indices ',len(indices))
# print('batch_size ', batch_size)
# print('hello')
# print('batch_size ', batch_size)
# print('indices ', indices)
# extract batches
# import ipdb;ipdb.set_trace()
batch_idx = [indices.popleft() for i in range(batch_size)]
# print('batch idx', batch_idx)
# print('batch idx train ', batch_idx)
# import ipdb;ipdb.set_trace()
# print('here it ok')
# batch_idx = tuple(slice(x) for x in batch_idx)
# import ipdb;ipdb.set_trace()
# try:
train_x, train_y, coord_train, adj_train = train_data[batch_idx, :], train_labels[batch_idx], \
coord_train_xy[
batch_idx], train_adjacency_matrix[batch_idx]
# except:
# import ipdb;ipdb.set_trace()
# print('here it ok 2')
train_x = Variable(torch.FloatTensor(train_x).type(dtypeFloat), requires_grad=False)
# L_train = Variable(torch.FloatTensor(L_train).type(dtypeFloat), requires_grad=False)
# lmax_train = Variable(torch.FloatTensor(lmax_train).type(dtypeFloat), requires_grad=False)
# print('here it ok 3')
train_y = train_y.astype(np.int64)
# print('here it ok 4')
train_y = torch.LongTensor(train_y).type(dtypeLong)
# print('here it ok 5')
train_y = Variable(train_y, requires_grad=False)
# print('here it ok 6')
coord_train = coord_train.astype(np.int64)
coord_train = torch.LongTensor(coord_train).type(dtypeLong)
coord_train = Variable(coord_train, requires_grad=False)
# import ipdb;ipdb.set_trace()
# adj_train=adj_train.astype(np.int64)
# adj_train=Variable(torch.FloatTensor(adj_train).type(dtypeFloat), requires_grad=False)
# Forward
y = net.forward(train_x, dropout_value, L_train, lmax_train, coord_train, adj_train)
# import ipdb;ipdb.set_trace()
loss = net.loss(y, train_y, l2_regularization)
loss_train = loss.data[0]
# Accuracy
acc_train = net.evaluation(y, train_y.data)
# backward
loss.backward()
# Update
global_step += batch_size # to update learning rate
optimizer.step()
optimizer.zero_grad()
# loss, accuracy
running_loss += loss_train
running_accuray += acc_train
running_total += 1
# print
if not running_total % 100: # print every x mini-batches
print('epoch= %d, i= %4d, loss(batch)= %.4f, accuray(batch)= %.2f' % (
epoch + 1, running_total, loss_train, acc_train))
# print('helooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo')
# print
# import ipdb;ipdb.set_trace()
t_stop = time.time() - t_start
print('epoch= %d, loss(train)= %.3f, accuracy(train)= %.3f, time= %.3f, lr= %.5f' %
(epoch + 1, running_loss / running_total, running_accuray / running_total, t_stop, lr))
# update learning rate
lr = global_lr * pow(decay, float(global_step // decay_steps))
optimizer = net.update_learning_rate(optimizer, lr)
# Test set
running_accuray_test = 0
running_total_test = 0
indices_test = collections.deque()
indices_test.extend(range(test_data.shape[0]))
t_start_test = time.time()
while len(indices_test) >= batch_size:
batch_idx_test = [indices_test.popleft() for i in range(batch_size)]
# print('batch idx test ', batch_idx_test)
test_x, test_y, coord_test, adj_test = test_data[batch_idx_test, :], test_labels[batch_idx_test], \
coord_test_xy[
batch_idx_test], test_adjacency_matrix[batch_idx_test]
test_x = Variable(torch.FloatTensor(test_x).type(dtypeFloat), requires_grad=False)
coord_test = coord_test.astype(np.int64)
coord_test = torch.LongTensor(coord_test).type(dtypeLong)
coord_test = Variable(coord_test, requires_grad=False)
# adj_test=adj_test.astype(np.int64)
# adj_test=torch.LongTensor(adj_test).type(dtypeLong)
# adj_test=Variable(adj_test, requires_grad=False)
y = net.forward(test_x, 0.0, L_test, lmax_test, coord_test, adj_test)
test_y = test_y.astype(np.int64)
test_y = torch.LongTensor(test_y).type(dtypeLong)
test_y = Variable(test_y, requires_grad=False)
acc_test = net.evaluation(y, test_y.data)
running_accuray_test += acc_test
running_total_test += 1
t_stop_test = time.time() - t_start_test
print(' accuracy(test) = %.3f %%, time= %.3f' % (running_accuray_test / running_total_test, t_stop_test))
l get for decay=0.95 56% (rather tha 75%) why ?
and for all the rest of deacy values 0 accuracy for train and test ? !!!
What’s wrong is it due to my for loop on decay values ?