Hi!
I’m implementing PPO with a recurrent actor and recurrent critic (both the actor and critic have a LSTM layer in them). The problem is, for both of the LSTM layers (in actor and critic) the gradient of weight_hh_l[k] is always zero, whereas the gradient of weight_ih_l[k] isn’t.
Can someone help ?
Here’s the code for the critic, the actor is very similar.
Important notes: please don’t mind the fact that I don’t normalize all the elements of x in function normalize(x). This is simply related to my environment where the last two elements of the state are already between 0 and 1. Also, the critic recieves both the action and the state as input for the forward function.
class CriticNetwork(nn.Module):
def __init__(self, nb_time_steps, len_action, len_state, num_layers=1, dropout=0, orthogonal_init=False, cuda=False):
super(CriticNetwork, self).__init__()
self.len_action = len_action
self.len_state = len_state
self.num_layers = num_layers
self.hidden_size = 1
self.cuda = cuda
self.nb_time_steps = nb_time_steps
rnn_layer = nn.LSTM(input_size=len_state+self.len_action, hidden_size=self.hidden_size, num_layers=num_layers,
batch_first=True, dropout=dropout, device='cuda' if self.cuda else 'cpu')
output_layer = nn.Linear(self.hidden_size, self.hidden_size, device='cuda' if self.cuda else 'cpu')
if orthogonal_init:
orthogonal_init_func([rnn_layer, output_layer])
self.layers = nn.ModuleDict({'rnn_layer': rnn_layer,
'output_layer': output_layer,
'normalization_layer': nn.LayerNorm(len_state - 2, eps=1e-05,
elementwise_affine=True)
})
self.activations = nn.ModuleDict({'relu_0': nn.LeakyReLU(), 'tanh': nn.Tanh()})
if cuda:
self.to(torch.device('cuda'))
def forward(self, x): # x = action, state
normalized_x = self.normalize(x)
h_0 = Variable(torch.zeros(self.num_layers, normalized_x.size(0), self.hidden_size,
device='cuda' if self.cuda else 'cpu')) # hidden state
c_0 = Variable(torch.zeros(self.num_layers, normalized_x.size(0), self.hidden_size,
device='cuda' if self.cuda else 'cpu')) # internal state
# Propagate input through LSTM
output, (hn, cn) = self.layers['rnn_layer'](normalized_x, (h_0, c_0)) # lstm with input, hidden, and internal state
hn_ = hn[-1].view(-1, self.hidden_size) # reshaping the data for Dense layer next
output_rnn = self.activations['relu_0'](self.layers['output_layer'](hn_))
return self.activations['tanh'](torch.reshape(output_rnn, (self.nb_time_steps, 1)))
def normalize(self, x): # x = action, state
normalized_x = []
for x_elem in x:
normalized_state = torch.cat((x_elem[0:self.len_action],
self.layers['normalization_layer'](x_elem[self.len_action:len(x_elem) - 2]),
x_elem[len(x_elem)-2:]))
normalized_x.append(normalized_state)
normalized_x = torch.stack(normalized_x)
return torch.reshape(normalized_x, (normalized_x.shape[0], 1, normalized_x.shape[1]))
The orthogonal initialization function I wrote is:
def orthogonal_init_func(layers):
for layer in layers:
if type(layer) in [nn.GRU, nn.LSTM, nn.RNN]:
for name, param in layer.named_parameters():
if 'weight_ih' in name:
torch.nn.init.xavier_uniform_(param.data)
elif 'weight_hh' in name:
torch.nn.init.orthogonal_(param.data)
elif 'bias' in name:
param.data.fill_(0)
else:
nn.init.orthogonal_(layer.weight)