With regards to the thread topic, could anyone help to advise what is wrong with the loss_function() computation logic (especially policy_output_discrete
and value_output_discrete
) in my NN ?
# Forward Pass
policy_output, value_output = net(_board_features_and_turn)
# Since both policy_output and value_output are of continuous probability nature,
# we need to change them to discrete number for loss_function() computation
policy_output_discrete = torch.zeros(len(_score), NUM_OF_POSSIBLE_MOVES, requires_grad=True)
if USE_CUDA:
policy_output_discrete = policy_output_discrete.cuda()
for topk_index in range(len(_score)): # functionally equivalent to softmax()
policy_output_discrete[topk_index][policy_output.topk(1).indices[topk_index]] = 1
# substract 1 because score is one of these [-1, 0, 1] values
value_output_discrete = torch.topk(value_output, 1).indices - 1
# Loss at each iteration by comparing to target(moves)
loss1 = loss_function(policy_output_discrete, move)
# Loss at each iteration by comparing to target(score)
loss2 = loss_function(value_output_discrete, _score)
loss = loss1 + loss2
# Backpropagating gradient of loss
optimizer.zero_grad()
loss.backward()