How many hidden layers for a3c with 8 inputs ranging from -2000 to 2000 and 3 discrete actions


I am trying to build an A3C with 8 inputs which are normalized and then discretized as int b/w -2000 to 2000.I need to have the past info as well therefore I am using LSTM after first layer.

However I did not have any major success in training the model even after 1Lakh episodes.
May I have some help in what should be the ideal number of hidden layer and size of hidden layer for such a problem.

class ActorCritic(torch.nn.Module):

def __init__(self, params):
    super(ActorCritic, self).__init__()

    self.num_inputs = params.num_inputs
    self.action_space = params.action_dim
    self.hidden_size = params.hidden_size
    num_inputs = params.num_inputs
    self.lstm = nn.LSTM(num_inputs, 8,num_layers = params.lstm_layers)
    self.fc1 = nn.Linear(8, 8)
    self.fc2 = nn.Linear(8, 8)
    # self.fc3 = nn.Linear(8, 8)
    # self.fc3.apply(init_weights)
    # self.fc4 = nn.Linear(8, 8)
    # self.fc4.apply(init_weights)
    self.critic_linear = nn.Linear(8, 1)
    self.actor_linear = nn.Linear(8, self.action_space)

def forward(self, inputs):
    inputs, (hx, cx) = inputs
    inputs = inputs.reshape(1,1,-1)
    output, (hx, cx) = self.lstm(inputs, (hx, cx))
    x = torch.tanh(self.fc1(output))
    x = torch.tanh(self.fc2(x))
    # x = torch.tanh(self.fc3(x))
    # x = torch.tanh(self.fc3(x))
    return self.critic_linear(x), self.actor_linear(x), (hx, cx)

def save(self, filename, directory):, '%s/%s_actor.pth' % (directory, filename))

def load(self, filename, directory):
        self.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))

I have around 2 million rows of data with me. and 1 episode takes around 10 rows on average. The input is raw data therefore no convolutions needed.

Any expert advice on this would be of great help.