Pytorch autograd peeking inside

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(3, 1)
        self.a_head = nn.Linear(1, 2)
        self.v_head = nn.Linear(1, 1)
        nn.init.uniform_(self.fc.weight,1,1)
        nn.init.uniform_(self.fc.bias,0,0)
        nn.init.uniform_(self.a_head.weight,1,1)
        nn.init.uniform_(self.a_head.bias,0,0)
        nn.init.uniform_(self.v_head.weight,1,1)
        nn.init.uniform_(self.v_head.bias,0,0)

    def forward(self, x):
        x = F.tanh(self.fc(x))
        a = self.a_head(x) - self.a_head(x).mean(1, keepdim=True)
        v = self.v_head(x)
        action_scores = a + v
        return action_scores

class Model():

    max_grad_norm = 0.5
    def __init__(self):

        self.eval_net, self.target_net = Net().float(), Net().float()
        self.optimizer = optim.Adam(self.eval_net.parameters(), lr=1e-3)
    
    def update(self):

        x1 = torch.tensor([[1,1,1]], dtype=torch.float)
        y1 = torch.tensor([[1]], dtype=torch.long).view(-1, 1)
        x2 = torch.tensor([[1,1,1]], dtype=torch.float)
                
        with torch.no_grad():
            y2 = self.eval_net(s_).max(1, keepdim=True)[1]
            q_target = self.target_net(x2).gather(1, y2)

        q_eval = self.eval_net(x1).gather(1, y1)
        print("__________________x1","_________",y1,"_")
        print("_fc_w",self.eval_net.fc.weight,"fc_b",self.eval_net.fc.bias,"_fc_wgrads",self.eval_net.fc.weight.grad,"_fc_bgrads",self.eval_net.fc.bias.grad)
        print("_a_w",self.eval_net.a_head.weight,"a_b",self.eval_net.a_head.bias,"_a_wgrads",self.eval_net.a_head.weight.grad,"_a_bgrads",self.eval_net.a_head.bias.grad)
        print("_v_w",self.eval_net.v_head.weight,"v_b",self.eval_net.v_head.bias,"_v_wgrads",self.eval_net.v_head.weight.grad,"_v_bgrads",self.eval_net.v_head.bias.grad)

        print("loss",q_eval-q_target)

        self.optimizer.zero_grad() 
        loss = F.smooth_l1_loss(q_eval, 2*q_target)

        loss.backward() # that's where the derivatives are calculated    

        print("loss__",loss)

        #nn.utils.clip_grad_norm_(self.eval_net.parameters(), self.max_grad_norm)
        #self.optimizer.step()

        print("_fc_w",self.eval_net.fc.weight,"fc_b",self.eval_net.fc.bias,"_fc_wgrads",self.eval_net.fc.weight.grad,"_fc_bgrads",self.eval_net.fc.bias.grad)
        print("_a_w",self.eval_net.a_head.weight,"a_b",self.eval_net.a_head.bias,"_a_wgrads",self.eval_net.a_head.weight.grad,"_a_bgrads",self.eval_net.a_head.bias.grad)
        print("_v_w",self.eval_net.v_head.weight,"v_b",self.eval_net.v_head.bias,"_v_wgrads",self.eval_net.v_head.weight.grad,"_v_bgrads",self.eval_net.v_head.bias.grad)

def main():
    model = Model()    
    model.update()
                 
if __name__ == '__main__':
    main()

well I wrote a little model that that has two networks, eval_net and target_net, with biase 0 and weight 1; data x1 y1 and x2 with 1 as value, just to try to recalculate manually using my calculator and check with pytorch gradients results, but i’m always wrong, so how pytorch computed these grads in this specific model ?

this is the output

_fc_weight.grads = tensor[[-0.0098, -0.0098, -0.0098]]
_fc_bias.grads = tensor[-0.0098]

_a_w grads tensor[[ 0.4951],[-0.4951]]
_a_b grads tensor([ 0.4975, -0.4975])

_v_wgrads tensor[[-0.9901]]
_v_bgrads tensor[-0.9951]

manually calculating, but got wrong values gradients, so what is the secret formula for this specific case ?
that’s what i’ve done for a_w_grads:
print("mycalculation
_______",p*(1-math.tanh(3)**2)2(p-loss))

with this function:

def getBack(var_grad_fn):
print(var_grad_fn)
for n in var_grad_fn.next_functions:
if n[0]:
try:
tensor = getattr(n[0], ‘variable’)
print(n[0])
print(‘Tensor with grad found:’, tensor)
print(’ - gradient:’, tensor.grad)
print()
except AttributeError as e:
getBack(n[0])

I got:
<SmoothL1LossBackward object at 0x00000085E8839280>
<GatherBackward object at 0x00000085E88393D0>
<AddBackward0 object at 0x00000085E8839400>
<SubBackward0 object at 0x00000085E8839370>
<AddmmBackward object at 0x00000085E8839490>
<AccumulateGrad object at 0x00000085E8839550>
Tensor with grad found: Parameter containing:
tensor([0., 0.], requires_grad=True)

  • gradient: tensor([ 0.4975, -0.4975])

<TanhBackward object at 0x00000085E8839580>
<AddmmBackward object at 0x00000085E8839670>
<AccumulateGrad object at 0x00000085E8839700>
Tensor with grad found: Parameter containing:
tensor([0.], requires_grad=True)

  • gradient: tensor([-0.0098])

<TBackward object at 0x00000085E88396D0>
<AccumulateGrad object at 0x00000085E8839730>
Tensor with grad found: Parameter containing:
tensor([[1., 1., 1.]], requires_grad=True)

  • gradient: tensor([[-0.0098, -0.0098, -0.0098]])

<TBackward object at 0x00000085E88395B0>
<AccumulateGrad object at 0x00000085E8839670>
Tensor with grad found: Parameter containing:
tensor([[1.],
[1.]], requires_grad=True)

  • gradient: tensor([[ 0.4951],
    [-0.4951]])

<MeanBackward1 object at 0x00000085E88394C0>
<AddmmBackward object at 0x00000085E88395B0>
<AccumulateGrad object at 0x00000085E8839550>
Tensor with grad found: Parameter containing:
tensor([0., 0.], requires_grad=True)

  • gradient: tensor([ 0.4975, -0.4975])

<TanhBackward object at 0x00000085E8839580>
<AddmmBackward object at 0x00000085E88396D0>
<AccumulateGrad object at 0x00000085E8839700>
Tensor with grad found: Parameter containing:
tensor([0.], requires_grad=True)

  • gradient: tensor([-0.0098])

<TBackward object at 0x00000085E8839730>
<AccumulateGrad object at 0x00000085E8839790>
Tensor with grad found: Parameter containing:
tensor([[1., 1., 1.]], requires_grad=True)

  • gradient: tensor([[-0.0098, -0.0098, -0.0098]])

<TBackward object at 0x00000085E8839670>
<AccumulateGrad object at 0x00000085E88396D0>
Tensor with grad found: Parameter containing:
tensor([[1.],
[1.]], requires_grad=True)

  • gradient: tensor([[ 0.4951],
    [-0.4951]])

<AddmmBackward object at 0x00000085E8839460>
<AccumulateGrad object at 0x00000085E88394C0>
Tensor with grad found: Parameter containing:
tensor([0.], requires_grad=True)

  • gradient: tensor([-0.9951])

<TanhBackward object at 0x00000085E88394F0>
<AddmmBackward object at 0x00000085E88395E0>
<AccumulateGrad object at 0x00000085E8839550>
Tensor with grad found: Parameter containing:
tensor([0.], requires_grad=True)

  • gradient: tensor([-0.0098])

<TBackward object at 0x00000085E88396A0>
<AccumulateGrad object at 0x00000085E88396D0>
Tensor with grad found: Parameter containing:
tensor([[1., 1., 1.]], requires_grad=True)

  • gradient: tensor([[-0.0098, -0.0098, -0.0098]])

<TBackward object at 0x00000085E8839490>
<AccumulateGrad object at 0x00000085E88395E0>
Tensor with grad found: Parameter containing:
tensor([[1.]], requires_grad=True)

  • gradient: tensor([[-0.9901]])

doesnt help a lot

Hi,

The grads are computed for each elementary functions from the end to the start.
You might want to check with a simple model if you want to write the formula by hand.

Also I’m not sure the formula for the gradient fo the smooth l1 loss is correct in the one like you shared.

If you want to print the graph of all the backward functions, I would recommend this package which will make it much easier to visualize.

inp = torch.tensor([[1,1,1]], dtype=torch.float)

i = torch.tensor([[1]], dtype=torch.long).view(-1, 1)

outp = self.eval_net(inp).gather(1, i)

self.optimizer.zero_grad()

loss = F.smooth_l1_loss(q_eval, 0*q_eval)

loss.backward()

so how the backward work if(eval_net) has multiple outputs ?

class Net(nn.Module):
  def **init** (self):
    super(Net, self). **init** ()
    self.fc = nn.Linear(3, 1)
    self.a_head = nn.Linear(1, 2) #in this case two outputs

How loss of dimension 1 can work with output of n dimension when backward

torchviz doesn’t show the math behind the derivatives

But hopefully the math for these simple function should be easy to derive.
At least it gives you the structure of the calculations.

so how the backward work if(eval_net) has multiple outputs ?

What do you mean? In your code example above, it returns a single Tensor that you call gather on right?

PS: You can put your code examples in between triple ticks ``` to make them look better (check how I edited your last message).

I’m going to be clear, :slight_smile: I hate pytorch or any rl framework they are parasites, for that I wrote my own dqn that solves pendulumv0, I copied every piece of code from to numpy but it’s not still working, can someone debug the code, it’s the best that I could do come with

Well they do help quite a lot to avoid having such issues.
You don’t have to spend days computing gradients by hand and even more debugging them.

From looking at your code, I’m not sure how you define the gradient from the loss function, and given that your optimizer is already flipping the sign of the gradient, you should not do it there.
Also your tanh formula is wrong as its missing the chain rule that mutiplies it by the flowing gradients.
There might be other issues though…

your optimizer already flipping the sign of the gradient— where what line ?
tanh formula wrong ? I dont see that …

well can you just refix the code and show me what you’ve got, because I really need that code, and also that why I hate using frameworks because they solve things for you without understanding a nible of what they do, and also they’re heavier. thanks you so much anyway, I’ve been struggling for about month right know.

where what line ? where what line ?

https://github.com/Isawyou/dqn-numpy-only-not-working/blob/c91a1b788523497f52ca6497c604851e76a97c3a/dqn%20numpy%20only/actor_net.py#L104

I dont see that …

The chain rule says you should multiply the gradient with the derivative of the current function.
If I understand correctly that this tries to compute the gradient wrt of the output of the first linear, then you’re definitely missing a grad_a or out1 somewhere in that formula.

well can you just refix the code

As I said above, writing this kind of code by hand would take way too much time as there are so many places where you can make subtle mistakes. I’m afraid I don’t have the time to do that for you.

I hate using frameworks because they solve things for you without understanding a nible of what they do

Yes, but depending on what you want to do, you don’t need to understand all the details.
And even if you do understand all the details, it does not mean that you can re-implement the whole thing, bug-free, easily.

yeah, just like everybody, well all depend on pytorch to do the work for us, nobody can replicate what it does, I’m trying to break this idea, not you?

You can replicate it but you will need to use the right concept.
Writing derivatives by hand works fine for linear of trivial problems.
If you start having networks, you might need to have the backprop done automatically using something that looks like AD. Otherwise, as you see in your repo, the code becomes hard to read (and buggy) very quickly.

I prefer to spend time making pytorch do more stuff instead of re-implementing it. But that’s a personal choice :slight_smile:

and that’s why I’m lonely!

you was right about tanh, I’ve done an error :slight_smile:

import argparse

import pickle

from collections import namedtuple

import matplotlib.pyplot as plt

import numpy as np

import gym

import torch

import torch.nn as nn

import torch.nn.functional as F

import torch.optim as optim

parser = argparse.ArgumentParser(description=‘Solve the Pendulum-v0 with DQN’)

parser.add_argument(

'--gamma', type=float, default=0.9, metavar='G', help='discount factor (default: 0.9)')

parser.add_argument(

'--num_actions', type=int, default=5, metavar='N', help='discretize action space (default: 5)')

parser.add_argument(’–seed’, type=int, default=0, metavar=‘N’, help=‘random seed (default: 0)’)

parser.add_argument(’–render’, action=‘store_true’, help=‘render the environment’)

parser.add_argument(

'--log-interval',

type=int,

default=10,

metavar='N',

help='interval between training status logs (default: 10)')

args = parser.parse_args()

torch.manual_seed(args.seed)

np.random.seed(args.seed)

TrainingRecord = namedtuple(‘TrainingRecord’, [‘ep’, ‘reward’])

Transition = namedtuple(‘Transition’, [‘s’, ‘a’, ‘r’, ‘s_’])

class Opt():

def __init__(self):



    self.params = {}

    self.params['W1'] = self._uniform_init(3, 100)

    self.params['b1'] = np.zeros(100)

    self.params['W2'] = self._uniform_init(100, 5)

    self.params['b2'] = np.zeros(5)

    self.grads = {}

    self.grads['W1'] = self._uniform_init(3, 100)

    self.grads['b1'] = np.zeros(100)

    self.grads['W2'] = self._uniform_init(100, 5)

    self.grads['b2'] = np.zeros(5)



    self.optm_cfg ={}

    self.optm_cfg['W1'] = None

    self.optm_cfg['b1'] = None

    self.optm_cfg['W2'] = None

    self.optm_cfg['b2'] = None

def train(self):



    self.params['W2'] = self._adam(self.params['W2'], self.grads['W2'], config=self.optm_cfg['W2'])[0]

    self.params['W1'] = self._adam(self.params['W1'], self.grads['W1'], config=self.optm_cfg['W1'])[0]

    self.params['b2'] = self._adam(self.params['b2'], self.grads['b2'], config=self.optm_cfg['b2'])[0]

    self.params['b1'] = self._adam(self.params['b1'], self.grads['b1'], config=self.optm_cfg['b1'])[0]

    #print("_a_b", self.params['b2'])

    #print("_fc_b", self.params['b1'])

    # Update the configuration parameters to be used in the next iteration

    self.optm_cfg['W2'] = self._adam(self.params['W2'], self.grads['W2'], config=self.optm_cfg['W2'])[1]

    self.optm_cfg['W1'] = self._adam(self.params['W1'], self.grads['W1'], config=self.optm_cfg['W1'])[1]

    self.optm_cfg['b2'] = self._adam(self.params['b2'], self.grads['b2'], config=self.optm_cfg['b2'])[1]

    self.optm_cfg['b1'] = self._adam(self.params['b1'], self.grads['b1'], config=self.optm_cfg['b1'])[1]



def _adam(self, x, dx, config=None):

  

    if config is None: config = {}

    config.setdefault('learning_rate', 1e-3)

    config.setdefault('beta1', 0.9)

    config.setdefault('beta2', 0.999)

    config.setdefault('epsilon', 1e-8)

    config.setdefault('m', np.zeros_like(x))

    config.setdefault('v', np.zeros_like(x))

    config.setdefault('t', 0)

  

    next_x = None

  

    #Adam update formula,                                                 #

    config['t'] += 1

    config['m'] = config['beta1']*config['m'] + (1-config['beta1'])*dx

    config['v'] = config['beta2']*config['v'] + (1-config['beta2'])*(dx**2)

    bias_corr1 = (1 - config['beta1']**config['t'])

    bias_corr2 =  (1 - config['beta2']**config['t'])

  

    denom=(np.sqrt(config['v'])) / (np.sqrt(bias_corr2)+config['epsilon'])

 

    step_size=config['learning_rate'] / bias_corr1

  

    next_x= x + (-step_size)*(config['m']/denom)

    if(len(next_x.shape)>1):

        import math

        for i in range(len(next_x[0])):

            if(math.isnan(next_x[0,i])):

                next_x[0,i]=x[0,i]

    else:

       import math

       for i in range(len(next_x)):

            if(math.isnan(next_x[i])):

                next_x[i]=x[i]

    return next_x, config

def _uniform_init(self, input_size, output_size):

    u = np.sqrt(6./(input_size+output_size))

    return np.random.uniform(-u, u, (input_size, output_size))

class Net(nn.Module):

def __init__(self):

    super(Net, self).__init__()

    self.fc = nn.Linear(3, 100)

    self.a_head = nn.Linear(100, args.num_actions)

def forward(self, x):

    x = F.relu(self.fc(x))

    a = self.a_head(x) 

    action_scores = a 

    return action_scores

class Memory():

data_pointer = 0

isfull = False

def __init__(self, capacity):

    self.memory = np.empty(capacity, dtype=object)

    self.capacity = capacity

def update(self, transition):

    self.memory[self.data_pointer] = transition

    self.data_pointer += 1

    if self.data_pointer == self.capacity:

        self.data_pointer = 0

        self.isfull = True

def sample(self, batch_size):

    return np.random.choice(self.memory, batch_size)

class Agent():

action_list = [( -2,) ,(2,) ,(3,) ,(-3,),(4,) ]

max_grad_norm = 0.5

def __init__(self):

    self.training_step = 0

    self.epsilon = 1

    self.eval_net, self.target_net = Net().float(), Net().float()

    self.memory = Memory(2000)

    self.optimizer = optim.Adam(self.eval_net.parameters(), lr=1e-3)

    self.opt=Opt()

def select_action(self, state):

    state = torch.from_numpy(state).float().unsqueeze(0)

    if np.random.random() < self.epsilon:

        action_index = np.random.randint(args.num_actions)

    else:

        probs = self.eval_net(state)

        action_index = probs.max(1)[1].item()

    return self.action_list[action_index], action_index

def save_param(self):

    torch.save(self.eval_net.state_dict(), 'param/dqn_net_params.pkl')

def store_transition(self, transition):

    self.memory.update(transition)

def update(self):

    self.training_step += 1

    transitions = self.memory.sample(32)

    s = torch.tensor([t.s for t in transitions], dtype=torch.float)

    a = torch.tensor([t.a for t in transitions], dtype=torch.long).view(-1, 1)

    r = torch.tensor([t.r for t in transitions], dtype=torch.float).view(-1, 1)

    s_ = torch.tensor([t.s_ for t in transitions], dtype=torch.float)

    # natural dqn

    # q_eval = self.eval_net(s).gather(1, a)

    # with torch.no_grad():

    #     q_target = r + args.gamma * self.target_net(s_).max(1, keepdim=True)[0]

    # double dqn

    with torch.no_grad():

        a_ = self.eval_net(s_).max(1, keepdim=True)[1]

        q_target = r + args.gamma * self.target_net(s_).gather(1, a_)

    q_eval = self.eval_net(s).gather(1, a)

    self.optimizer.zero_grad()

    loss = F.smooth_l1_loss(q_eval, q_target)

    loss.backward()

    #nn.utils.clip_grad_norm_(self.eval_net.parameters(), self.max_grad_norm)

    #self.optimizer.step()

    with torch.no_grad():

        for i in range(100):

            for j in range(5):

                self.opt.grads['b2'][j]=self.eval_net.a_head.bias.grad[j]

                self.opt.grads['W2'][i,j]=self.eval_net.a_head.weight.grad[j,i]

        for i in range(3):

            for j in range(100):

                self.opt.grads['b1'][j]=self.eval_net.fc.bias.grad[j]

                self.opt.grads['W1'][i,j]=self.eval_net.fc.weight.grad[j,i]

    

        self.opt.train()

    

        for i in range(100):

            for j in range(5):

                self.eval_net.a_head.bias[j]=self.opt.params['b2'][j]

                self.eval_net.a_head.weight[j,i]=self.opt.params['W2'][i,j]

        for i in range(3):

            for j in range(100):

                self.eval_net.fc.bias[j]=self.opt.params['b1'][j]

                self.eval_net.fc.weight[j,i]=self.opt.params['W1'][i,j]
    
    if self.training_step % 200 == 0:

        self.target_net.load_state_dict(self.eval_net.state_dict())

    self.epsilon = max(self.epsilon * 0.999, 0.01)

    return q_eval.mean().item()

def main():

env = gym.make('Pendulum-v0')

env.seed(args.seed)

agent = Agent()

training_records = []

running_reward, running_q = -1000, 0

state = env.reset()

for i_ep in range(100):

    score = 0

    

    for t in range(200000000000):

        action, action_index = agent.select_action(state)

        state_, reward, done, _ = env.step(action)

        score += reward

        if True:

            env.render()

        agent.store_transition(Transition(state, action_index, (reward + 8) / 8, state_))

        state = state_

        if agent.memory.isfull:

            q = agent.update()

            running_q = 0.99 * running_q + 0.01 * q

    running_reward = running_reward * 0.9 + score * 0.1

    training_records.append(TrainingRecord(i_ep, running_reward))

    if i_ep % args.log_interval == 0:

        print('Ep {}\tAverage score: {:.2f}\tAverage Q: {:.2f}'.format(

            i_ep, running_reward, running_q))

    if running_reward > -200:

        print("Solved! Running reward is now {}!".format(running_reward))

        env.close()

        agent.save_param()

        with open('log/dqn_training_records.pkl', 'wb') as f:

            pickle.dump(training_records, f)

        break

env.close()

plt.plot([r.ep for r in training_records], [r.reward for r in training_records])

plt.title('DQN')

plt.xlabel('Episode')

plt.ylabel('Moving averaged episode reward')

plt.savefig("img/dqn.png")

plt.show()

if name == ‘main’:

main()

I’ve implented adam by hand, not working why, if I uncomment the optimizer.step(), its working,
I copied this https://pytorch.org/docs/stable/_modules/torch/optim/adam.html#Adam.step

well, it sounds like doing RMSprop by hand works like a charm, :slight_smile: I conclude that adam has floating point unstability, I think that I solved ! who wants the source code, contacts me ! cheers