Hey there,
I implemented a four player card game using pytorch and reinforcement learning (PPO). To train the agents I make four exact copys and let them play against each other. I now would like to share after a certain update time the weights between this same networks.
I found this procedure:
-
Make all your modules
-
Make all your clones
-
Add all the modules and clones to a single nn.Container
-
Call :getParameters on the nn.Container to get params and grads. This will preserve any sharing of parameters between modules inside the nn.Container.
-
Now using the modules and clones as normal will play nice with optim because all of the params and grads reference the same storage as the tensors from :getParameters.
I tried to implement it as follows: (ppo are the models)
container = nn.Container()
for i in range(4):
container:add(ppo[i])
params = container.parameters()
How to apply the parameters now back to each model?
Is the above method the correct approach?
Further Snippets:
class PPO:
def __init__(self, state_dim, action_dim, n_latent_var, lr, betas, gamma, K_epochs, eps_clip, lr_decay=1000000):
self.lr = lr
self.betas = betas
self.gamma = gamma
self.eps_clip = eps_clip
self.K_epochs = K_epochs
self.policy = ActorCritic(state_dim, action_dim, n_latent_var)
self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=lr, betas=betas, eps=1e-5) # no eps before!
self.policy_old = ActorCritic(state_dim, action_dim, n_latent_var)
self.policy_old.load_state_dict(self.policy.state_dict())
#TO decay learning rate during training:
self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=lr_decay, gamma=0.9)
self.MseLoss = nn.MSELoss()
class ActorMod(nn.Module):
def __init__(self, state_dim, action_dim, n_latent_var):
super(ActorMod, self).__init__()
self.l1 = nn.Linear(state_dim, n_latent_var)
self.l1_tanh = nn.PReLU()
self.l2 = nn.Linear(n_latent_var, n_latent_var)
self.l2_tanh = nn.PReLU()
self.l3 = nn.Linear(n_latent_var+60, action_dim)
def forward(self, input):
x = self.l1(input)
x = self.l1_tanh(x)
x = self.l2(x)
out1 = self.l2_tanh(x) # 64x1
if len(input.shape)==1:
out2 = input[180:240] # 60x1 this are the available options of the active player!
output =torch.cat( [out1, out2], 0)
else:
out2 = input[:, 180:240]
output =torch.cat( [out1, out2], 1) #how to do that?
x = self.l3(output)
return x.softmax(dim=-1)
class ActorCritic(nn.Module):
def __init__(self, state_dim, action_dim, n_latent_var):
super(ActorCritic, self).__init__()
# actor
#TODO see question: https://discuss.pytorch.org/t/pytorch-multiple-inputs-in-sequential/74040
self.action_layer = ActorMod(state_dim, action_dim, n_latent_var)
# critic
self.value_layer = nn.Sequential(
nn.Linear(state_dim, n_latent_var),
nn.PReLU(),#prelu
nn.Linear(n_latent_var, n_latent_var),
nn.PReLU(),
nn.Linear(n_latent_var, 1)
)