Nan output in action

I’ve encountered a serious issue: after about 500–1000 iterations, my actions become NaN (e.g., action = [nan, nan, nan, nan, nan, nan]).

Three agents:

TensorDict(

fields={

   action: Tensor(shape=torch.Size(\[6\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   collector: TensorDict(

       fields={

           traj_ids: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True)},

       batch_size=torch.Size(\[\]),

       device=cuda:0,

       is_shared=True),

   done: Tensor(shape=torch.Size(\[1\]), device=cuda:0, dtype=torch.bool, is_shared=True),

   loc: Tensor(shape=torch.Size(\[6\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   observation: Tensor(shape=torch.Size(\[30\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   params: TensorDict(

       fields={

           default_x: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True),

           default_y: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True),

           max_movement: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True),

           memory_size: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True),

           rand_init: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.bool, is_shared=True),

           scale: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True),

           user_velocity: Tensor(shape=torch.Size(\[3\]), device=cuda:0, dtype=torch.int64, is_shared=True)},

       batch_size=torch.Size(\[\]),

       device=cuda:0,

       is_shared=True),

   sample_log_prob: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   scale: Tensor(shape=torch.Size(\[6\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   step_count: Tensor(shape=torch.Size(\[1\]), device=cuda:0, dtype=torch.int64, is_shared=True),

   step_index: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.int64, is_shared=True),

   terminated: Tensor(shape=torch.Size(\[1\]), device=cuda:0, dtype=torch.bool, is_shared=True),

   throughput_mean: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   throughput_std: Tensor(shape=torch.Size(\[\]), device=cuda:0, dtype=torch.float32, is_shared=True),

   ue_position: Tensor(shape=torch.Size(\[30, 2\]), device=cuda:0, dtype=torch.float32, is_shared=True)},

batch_size=torch.Size(),

device=cuda:0,

is_shared=True)