Hello!
I have two implementations of a nn.Module that, as far I know, build the same model:
def layer_init(in_features, out_features, std=np.sqrt(2), bias_const=0.0):
layer = nn.Linear(in_features, out_features)
torch.nn.init.orthogonal_(layer.weight, std)
torch.nn.init.constant_(layer.bias, bias_const)
return layer
class ActorCriticNet_1(nn.Module):
def __init__(self, obversation_shape, action_shape, layers):
super().__init__()
fc_layer_value = np.prod(obversation_shape)
action_shape = np.prod(action_shape)
self.actor_net = nn.Sequential()
self.critic_net = nn.Sequential()
for layer_value in layers:
self.actor_net.append(layer_init(fc_layer_value, layer_value))
self.actor_net.append(nn.Tanh())
self.critic_net.append(layer_init(fc_layer_value, layer_value))
self.critic_net.append(nn.Tanh())
fc_layer_value = layer_value
self.actor_mean = layer_init(layers_actor[-1], action_shape, std=0.01)
self.actor_std = layer_init(layers_actor[-1], action_shape, std=0.01)
self.critic_net.append(layer_init(layers_critic[-1], 1, std=1.0))
class ActorCriticNet_2(nn.Module):
def __init__(self, obversation_shape, action_shape, layers_actor, layers_critic):
super().__init__()
action_shape = np.prod(action_shape)
# Actor network
self.actor_net = nn.Sequential()
actor_input_size = np.prod(obversation_shape)
for layer_size in layers_actor:
self.actor_net.append(layer_init(actor_input_size, layer_size))
self.actor_net.append(nn.Tanh())
actor_input_size = layer_size
self.actor_mean = layer_init(actor_input_size, action_shape, std=0.01)
self.actor_std = layer_init(actor_input_size, action_shape, std=0.01)
# Critic network
self.critic_net = nn.Sequential()
critic_input_size = np.prod(obversation_shape)
for layer_size in layers_critic:
self.critic_net.append(layer_init(critic_input_size, layer_size))
self.critic_net.append(nn.Tanh())
critic_input_size = layer_size
self.critic_net.append(layer_init(critic_input_size, 1, std=0.01))
but when I am testing these two models, they don’t show the same resultls despite having the same seeding. The thing that got me posting this message is that the ActorCriticNet_2
tends to crash randomly during training. By crashing I mean that my PC autoreboot.
I had the same kind of crash with JAX, and since I’m running this model on CPU for speed, I guess something wrong is happening with my CPU, but I can’t understand why there are differences in training between the two model.
And of course, printing both models output the same thing