Hi there,
I was trying to implement a A2C model to train one of the OpenGym project.
I created two models that are identical to me in terms of structure and forward logic. The main difference between the two models is that one is created using ModulelList with sequence wrapping inside while the other one is using Sequence. However, only the Sequence implementation is learning.
Model One
torch.manual_seed(999)
base_h_num = 0
actor_h_num = 1
critic_h_num = 1
act_size = 4
input_dim = 33
class A2C_model(nn.Module):
def __init__(self, input_dim, act_size):
super().__init__()
self.input_dim = input_dim
self.act_size = act_size
self.base = self.create_base(self.input_dim)
self.mu = self.create_actor()
self.val = self.create_critic()
self.std = nn.Parameter(torch.ones(1, act_size))
def create_base(self, input_dim):
module_list = nn.ModuleList()
layer = nn.Sequential()
fc = nn.Linear(input_dim, 128)
layer.add_module(f"fc_layer_1", fc)
layer.add_module(f"RELU_layer_1", nn.ReLU())
module_list.append(layer)
self.add_hidden_layer(module_list, base_h_num,128, 128)
return module_list
def create_actor(self):
module_list = nn.ModuleList()
layer = nn.Sequential()
self.add_hidden_layer(module_list, actor_h_num, 128, 128)
module_list.append(nn.Sequential(nn.Linear(128, self.act_size)))
return module_list
def create_critic(self):
module_list = nn.ModuleList()
layer = nn.Sequential()
self.add_hidden_layer(module_list, critic_h_num, 128, 128)
module_list.append(nn.Sequential(nn.Linear(128, 1)))
return module_list
def add_hidden_layer(self, module_list, num_hidden_layer,
input_dim, output_dim):
if num_hidden_layer == 0:
return
for i in range(1, num_hidden_layer+1):
layer = nn.Sequential()
fc = nn.Linear(input_dim, output_dim)
layer.add_module(f"fc_layer_{i}", fc)
layer.add_module(f"RELU_layer_{i}", nn.ReLU())
module_list.append(layer)
def forward(self, x):
for b in self.base:
x = b(x)
mu = x
for m in self.mu:
mu = m(mu)
dist = torch.distributions.Normal(mu, self.std)
actions = dist.sample()
log_prob = dist.log_prob(actions)
for v in self.val:
x = v(x)
return torch.clamp(actions, -1, 1), log_prob, x
Model 2
class A2C_model(nn.Module):
def __init__(self, input_dim , act_size):
super(ActorCriticNetwork, self).__init__()
self.fc1 = nn.Linear(input_dim , 128)
self.actor_fc = nn.Linear(128, 128)
self.actor_out = nn.Linear(128, act_size)
self.std = nn.Parameter(torch.ones(1, act_size))
self.critic_fc = nn.Linear(128, 128)
self.critic_out = nn.Linear(128, 1)
def forward(self, state):
x = F.relu(self.fc1(state))
mean = self.actor_out(F.relu(self.actor_fc(x)))
dist = torch.distributions.Normal(mean, self.std)
action = dist.sample()
log_prob = dist.log_prob(action)
value = self.critic_out(F.relu(self.critic_fc(x)))
return torch.clamp(action, -1, 1), log_prob, value
I created the ModuleList version is to play around with number of hidden layers and it will be easier to make changes to hidden layers. However, it performs very badly as compared to the Sequence implementation.
I have tried different seeds but the ModuleList version has no luck. This really makes me wonder what I have done wrong. I hope someone can help me what is root cause of this descrepancy so I won’t make the same mistake again. Cheers!
I have been trying to figure this thing out for days really appreciate if someone can help me!
Cheers!