Same model implementation but different results

Hello!

I have two implementations of a nn.Module that, as far I know, build the same model:

def layer_init(in_features, out_features, std=np.sqrt(2), bias_const=0.0):
    layer = nn.Linear(in_features, out_features)
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class ActorCriticNet_1(nn.Module):
    def __init__(self, obversation_shape, action_shape, layers):
        super().__init__()

        fc_layer_value = np.prod(obversation_shape)
        action_shape = np.prod(action_shape)

        self.actor_net = nn.Sequential()
        self.critic_net = nn.Sequential()

        for layer_value in layers:
            self.actor_net.append(layer_init(fc_layer_value, layer_value))
            self.actor_net.append(nn.Tanh())

            self.critic_net.append(layer_init(fc_layer_value, layer_value))
            self.critic_net.append(nn.Tanh())

            fc_layer_value = layer_value

        self.actor_mean = layer_init(layers_actor[-1], action_shape, std=0.01)
        self.actor_std = layer_init(layers_actor[-1], action_shape, std=0.01)

        self.critic_net.append(layer_init(layers_critic[-1], 1, std=1.0))


class ActorCriticNet_2(nn.Module):
    def __init__(self, obversation_shape, action_shape, layers_actor, layers_critic):
        super().__init__()

        action_shape = np.prod(action_shape)

        # Actor network
        self.actor_net = nn.Sequential()
        actor_input_size = np.prod(obversation_shape)
        for layer_size in layers_actor:
            self.actor_net.append(layer_init(actor_input_size, layer_size))
            self.actor_net.append(nn.Tanh())
            actor_input_size = layer_size

        self.actor_mean = layer_init(actor_input_size, action_shape, std=0.01)
        self.actor_std = layer_init(actor_input_size, action_shape, std=0.01)

        # Critic network
        self.critic_net = nn.Sequential()
        critic_input_size = np.prod(obversation_shape)
        for layer_size in layers_critic:
            self.critic_net.append(layer_init(critic_input_size, layer_size))
            self.critic_net.append(nn.Tanh())
            critic_input_size = layer_size

        self.critic_net.append(layer_init(critic_input_size, 1, std=0.01))

but when I am testing these two models, they don’t show the same resultls despite having the same seeding. The thing that got me posting this message is that the ActorCriticNet_2 tends to crash randomly during training. By crashing I mean that my PC autoreboot.

I had the same kind of crash with JAX, and since I’m running this model on CPU for speed, I guess something wrong is happening with my CPU, but I can’t understand why there are differences in training between the two model.

And of course, printing both models output the same thing

The order of calls to the pseudorandom number generator is different and thus the initialized models will not use the same values even if you seed the code at the beginning of the script.
If you want to load the same values from one model to the other use a state_dict.
I don’t know why your training crashes and what exactly fails and would need more information to try to help out.