CarRacing-v2 using PPO gives error `unexpected keyword argument 'action'` in the `ProbabilisticActor` module

phoenix1 · July 21, 2023, 7:10am

Here is the code:

import torch
from tensordict.nn import TensorDictModule
from torch import nn

from torchrl.envs.libs.gym import GymEnv
from torchrl.envs.utils import check_env_specs
from torchrl.modules import ProbabilisticActor, ValueOperator, OneHotCategorical
from tqdm import tqdm
from tensordict import TensorDict

device = "cpu" if not torch.has_cuda else "cuda:0"
num_cells = 256
lr = 3e-4
max_grad_norm = 1.0
frame_skip = 1
frames_per_batch = 1000 // frame_skip
# For a complete training, bring the number of frames up to 1M
total_frames = 10_000 // frame_skip
sub_batch_size = 64  # cardinality of the sub-samples gathered from the current data in the inner loop
num_epochs = 10  # optimisation steps per batch of data collected
clip_epsilon = (
    0.2  # clip value for PPO loss: see the equation in the intro for more context.
)
gamma = 0.99
lmbda = 0.95
entropy_eps = 1e-4

base_env = GymEnv("CarRacing-v2", device=device, frame_skip=frame_skip, continuous=False, from_pixels=True, pixels_only=True)
env = base_env

from torchrl.envs import Compose, ToTensorImage, TransformedEnv

transform = Compose(ToTensorImage(in_keys=["pixels"]))
env = TransformedEnv(env, transform)

n_actions = env.action_spec.space.n

print("observation_spec:", env.observation_spec)
print("reward_spec:", env.reward_spec)
print("done_spec:", env.done_spec)
print("action_spec:", env.action_spec)

print(check_env_specs(env))

rollout = env.rollout(10)
print("rollout of three steps:", rollout)
print("Shape of the rollout TensorDict:", rollout.batch_size)

actor_net = nn.Sequential(
    nn.Linear(96, num_cells, device=device),
    nn.Sigmoid(),
    nn.Linear(num_cells, num_cells, device=device),
    nn.Sigmoid(),
    nn.Linear(num_cells, num_cells, device=device),
    nn.Sigmoid(),
    nn.Linear(num_cells, n_actions, device=device),
    nn.Softmax()
)

policy_module = TensorDictModule(
    actor_net, in_keys=["pixels"], out_keys=["action"]
)

policy_module = ProbabilisticActor(
    module=policy_module,
    spec=env.action_spec,
    in_keys=["action"],
    distribution_class=OneHotCategorical,
    return_log_prob=True
)

value_net = nn.Sequential(
    nn.LazyLinear(num_cells, device=device),
    nn.Sigmoid(),
    nn.LazyLinear(num_cells, device=device),
    nn.Sigmoid(),
    nn.LazyLinear(num_cells, device=device),
    nn.Sigmoid(),
    nn.LazyLinear(1, device=device),
)

value_module = ValueOperator(
    module=value_net,
    in_keys=["pixels"],
)

policy_module(env.reset())

Now when I do env.reset() within policy_module, I get the error:

TypeError: distribution keywords and tensordict keys indicated by ProbabilisticTensorDictModule.in_keys must match.Got this error message: 
    __init__() got an unexpected keyword argument 'action'
with in_keys={'action': 'action'}

But according to my limited knowledge, shouldn’t Probabilistic Actor module get actions as input and ouput the log probabilities of the actions? This is in a discrete environment. How to resolve this?