I’ve encountered a serious issue: after about 500–1000 iterations, my actions become NaN (e.g., action = [nan, nan, nan, nan, nan, nan]).
Three agents:
TensorDict(
fields={
action: Tensor(shape=torch.Size(\[6\]), device=cuda:0, dtype=torch.float32, is_shared=True),
collector: TensorDict(
fields={
traj_ids: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True)},
batch_size=torch.Size(\[\]),
device=cuda:0,
is_shared=True),
done: Tensor(shape=torch.Size(\[1\]), device=cuda:0, dtype=torch.bool, is_shared=True),
loc: Tensor(shape=torch.Size(\[6\]), device=cuda:0, dtype=torch.float32, is_shared=True),
observation: Tensor(shape=torch.Size(\[30\]), device=cuda:0, dtype=torch.float32, is_shared=True),
params: TensorDict(
fields={
default_x: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True),
default_y: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True),
max_movement: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True),
memory_size: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True),
rand_init: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.bool, is_shared=True),
scale: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True),
user_velocity: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True)},
batch_size=torch.Size(\[\]),
device=cuda:0,
is_shared=True),
sample_log_prob: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.float32, is_shared=True),
scale: Tensor(shape=torch.Size(\[6\]), device=cuda:0, dtype=torch.float32, is_shared=True),
step_count: Tensor(shape=torch.Size(\[1\]), device=cuda:0, dtype=torch.int64, is_shared=True),
step_index: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True),
terminated: Tensor(shape=torch.Size(\[1\]), device=cuda:0, dtype=torch.bool, is_shared=True),
throughput_mean: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.float32, is_shared=True),
throughput_std: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.float32, is_shared=True),
ue_position: Tensor(shape=torch.Size(\[30, 2\]), device=cuda:0, dtype=torch.float32, is_shared=True)},
batch_size=torch.Size(),
device=cuda:0,
is_shared=True)