DQN noob question

I’m trying to implement a simple DQN. And I wonder if I have understood it correctly that it’s fine to apply the loss function to only the difference between the (scalar) target and just one element of the output, something like this:

def fn_reinforce(self,batch): # (state, action, reward, next_state)
        for i in range(self.batch_size):
            if batch[i].next_state is None:
                Q_target = batch[i].reward
                Q_predict = self.policy_net(batch[i].state)[0,batch[i].action]
                loss = self.loss_fn(Q_predict, Q_target)
                with torch.no_grad():
                    next_Q = torch.max(self.target_net(batch[i].next_state))
                Q_target = batch[i].reward + next_Q
                Q_predict = self.policy_net(batch[i].state)[0, batch[i].action]
                loss = self.loss_fn(Q_predict, Q_target)
            for param in self.policy_net.parameters():
                param.grad.data.clamp_(-1, 1)