Torch.nn.conv1d producing nans in a3c

granth_jain · November 15, 2020, 4:28pm

Hi,

I am trying extract some features from time-series data of window size 50. However after some training of a3c, outputs of nn.conv1d turns to nans.

Can someone please help to let me know what am I missing here.

Below is my code for actor critic

class ActorCritic(nn.Module):

def __init__(self, params):
    super(ActorCritic, self).__init__()

    self.window = params.window
    self.state_dim = params.state_dim
    self.action_space = params.action_dim
    self.hidden_size = params.hidden_size
    self.conv1 = nn.Conv1d(1, params.out_channels, params.kernel_size1, stride = params.kernel_size1 - 1, padding = params.kernel_size1 - 2)
    self.conv2 = nn.Conv1d(params.out_channels, params.out_channels, params.kernel_size2, stride = params.kernel_size2 - 1, padding = params.kernel_size2 - 2)
    self.lstm_size = self.calc_inputs(params.window)
    lstm_size = self.lstm_size
    params.lstm_size = lstm_size
    self.lstm = nn.LSTMCell(lstm_size, lstm_size)
    self.lstm.bias_ih.data.fill_(0)
    self.lstm.bias_hh.data.fill_(0)
    lst = [lstm_size]
    for i in range(params.layers):
        lst.append(params.hidden_size)
    
    self.hidden = nn.ModuleList()
    for k in range(len(lst)-1):
        self.hidden.append(nn.Linear(lst[k], lst[k+1]))
    for layer in self.hidden:
        layer.apply(init_weights)

    self.critic_linear = nn.Linear(lst[-1], 1)
    self.critic_linear.apply(init_weights)
    self.actor_linear = nn.Linear(lst[-1], self.action_space)
    self.actor_linear.apply(init_weights)
    self.train()

def forward(self, inputs):
    (state), (hx, cx) = inputs
    
    conv1_out = self.conv1(state[1])
    conv2_out = self.conv2(conv1_out).reshape(1,-1)
    inputs = torch.cat([state[0], conv2_out], dim = 1)
    # print(inputs)
    hx, cx = self.lstm(inputs, (hx, cx))
    x = hx
    for layer in self.hidden:
        x = F.elu(layer(x))
    return self.critic_linear(x), self.actor_linear(x), (hx, cx)

def calc_inputs(self,num_inputs):
    a = torch.rand(num_inputs).reshape(1,1,-1)
    a = self.conv2(self.conv1(a)).reshape(-1,)
    return a.shape[0] + 3
    
def save(self, filename, directory):
    torch.save(self.state_dict(), '%s/%s_actor.pth' % (directory, filename))

def load(self, filename, directory):
    self.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))

and here is code for training.

            state = env.reset()
            count = 0
            done = True
            info = False
            while(info!=True):
                values = []
                log_probs = []
                rewards = []
                entropies = []

                for step in range(params.num_steps):
                    if(info == True):
                        break
                    
                    if(done):
                        h_out = (Variable(torch.zeros([1, params.lstm_size])), Variable(torch.zeros([1, params.lstm_size])))
                        state = env.reset()
                    else:
                        h_out = (Variable(h_out[0].data), Variable(h_out[1].data))

                    h_in = h_out
                    value, action_values, h_out = model(((state), h_in))
                    action_values = action_values.reshape(-1,)
                    prob = F.softmax(action_values - max(action_values), dim = 0)    
                    log_prob = torch.log(prob)
                    entropy = -(log_prob * prob).sum()
                    entropies.append(entropy)
                    action = epsilon_greedy(prob, epsilon)
                    # action = Categorical(prob).sample().reshape(-1,)
                    log_prob_a = log_prob.gather(0, Variable(action))
                    # print("action_values:",action_values)
                    # print("prob:",prob)
                    # print("log_prob:",log_prob)
                    # print("action:",action, "log_prob_a:",log_prob_a)
                    (state), reward, done, info, _ = env.step(action)
                    reward = max(min(reward, 1), -1) 
                    count +=1
                    
                    if done:
                        (state) = env.reset()

                    values.append(value)
                    log_probs.append(log_prob_a)
                    rewards.append(reward)
    
                    if done:
                        break
                    
                R = torch.zeros(1, 1)
                if not done:
                    value, _, _ = model(((state), h_out))
                    R = value.data
                values.append(Variable(R))
                policy_loss = 0
                value_loss = 0
                R = Variable(R)
                gae = torch.zeros(1, 1).to(device)
                for i in reversed(range(len(rewards))):
                    R = params.gamma * R + rewards[i]
                    advantage = R - values[i]
                    value_loss = value_loss + 0.5 * advantage.pow(2)
                    TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data
                    gae = gae * params.gamma * params.tau + TD
                    policy_loss = policy_loss - log_probs[i] * gae - 0.01 * entropies[i]

                optimizer.zero_grad()
                (policy_loss + 0.5 * value_loss).mean().backward()
                nn.utils.clip_grad_norm_(model.parameters(), 40)
                ensure_shared_grads(model, shared_model)
                optimizer.step()
            model.save(filename, directory="./pytorch_models")

ptrblck · November 17, 2020, 10:50am

Is your loss exploding before you are seeing the NaN values?
If not, check all torch.log calls etc. for valid input values and make sure the output is not -Inf or NaN.

granth_jain · November 17, 2020, 3:58pm

Hi,

In all the code torch.log has softmax layer previous to it.
So I am assuming it will have valid inputs.

Also the value function should range between -10 and 20.therefore loss function should also not explode.

But let me try to confirm whether loss is exploding.

Besides I am unable to get why convolution output is nan for valid inputs.Does it have weighs that are updated?

Also is it ok the way how I am sampling action?
Sometimes it is giving error that it has been asked to sample from negative probability? But I am using softmax to calculate probs. Let me try to reproduce the error and check what are the probability values.

ptrblck · November 18, 2020, 2:01am

Yes, convolution layers are trainable and have a weight (filters) and bias parameter.
If the loss is exploding and thus the gradients are large in their magnitude, the parameter updates might yield to overflows.

granth_jain · November 18, 2020, 7:59am

Hi,

Thanks for the reply.

Using tanh activation function seems to have resolved the issue.
And negative probability was also due to exploding.