Hi Frank,
Thank you for the reply.
I’m running a real-time-chunking algo from Physical Intellegence team, which requires gradient computing during inference.
The difference during training and inference is that instead of using foward method to run forward pass, I wrap some operations into a denoise_step funtion to be compatible with rtc.
Below is the output:
Error in server: element 0 of tensors does not require grad and does not have a grad_fn
Traceback (most recent call last):
File “/home/u4090/Isaac-GR00T/gr00t/policy/server_client.py”, line 132, in run
handler.handler(**request.get(“data”, {}))
File “/home/u4090/Isaac-GR00T/gr00t/policy/policy.py”, line 87, in get_action
action, info = self._get_action(observation, options)
File “/home/u4090/Isaac-GR00T/gr00t/policy/gr00t_rtc_policy.py”, line 355, in _get_action
model_pred = self.model.get_rtc_action(**collated_inputs,inference_delay=inference_delay, prev_chunk_left_over=prev_chunk_left_over) # type: ignore
File “/home/u4090/Isaac-GR00T/gr00t/model/gr00t_n1d6/gr00t_n1d6.py”, line 673, in get_rtc_action
action_outputs = self.action_head.get_action(backbone_outputs, action_inputs, use_rtc=True, **kwargs)
File “/home/u4090/Isaac-GR00T/gr00t/model/gr00t_n1d6/gr00t_n1d6.py”, line 518, in get_action
return self.get_action_with_features(
File “/home/u4090/Isaac-GR00T/gr00t/model/gr00t_n1d6/gr00t_n1d6.py”, line 435, in get_action_with_features
pred_velocity = self.rtc_processor.denoise_step(
File “/home/u4090/Isaac-GR00T/gr00t/model/gr00t_n1d6/rtc_utils/rtc_modeling.py”, line 232, in denoise_step
correction = torch.autograd.grad(x1_t, x_t, grad_outputs, retain_graph=False)[0]
File “/home/u4090/miniconda3/envs/groot/lib/python3.10/site-packages/torch/autograd/init.py”, line 502, in grad
result = _engine_run_backward(
File “/home/u4090/miniconda3/envs/groot/lib/python3.10/site-packages/torch/autograd/graph.py”, line 824, in _engine_run_backward
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I tried to run a simple scripts below, which is the first step of the forward pass, but it behaves differently(requires grad is true). So I assume it is issue with some pytorch settings, but I cannot identify which. Do you have any clue what can cause require_grad to false, even with torch.is_grad_enabled() is also TRUE?
import torch
import torch.nn as nn
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
class CategorySpecificLinear(nn.Module):
"""Linear layer with category-specific weights and biases for multi-embodiment support."""
def \__init_\_(self, num_categories, input_dim, hidden_dim):
super().\__init_\_()
self.num_categories = num_categories
\# For each category, we have separate weights and biases.
self.W = nn.Parameter(0.02 \* torch.randn(num_categories, input_dim, hidden_dim))
self.b = nn.Parameter(torch.zeros(num_categories, hidden_dim))
def forward(self, x, cat_ids):
"""
Args:
x: \[B, T, input_dim\] input tensor
cat_ids: \[B\] category/embodiment IDs
Returns:
\[B, T, hidden_dim\] output tensor
"""
selected_W = self.W\[cat_ids\] # \[B, input_dim, hidden_dim\]
selected_b = self.b\[cat_ids\] # \[B, hidden_dim\]
print("selected_W.is_leaf:", selected_W.is_leaf, "selected_W.grad_fn:", selected_W.grad_fn, "selected_W.requires_grad:", selected_W.requires_grad)
return torch.bmm(x, selected_W) + selected_b.unsqueeze(1)
class MultiEmbodimentActionEncoder(nn.Module):
"""Action encoder with multi-embodiment support and sinusoidal positional encoding."""
def \__init_\_(self, action_dim, hidden_size, num_embodiments):
super().\__init_\_()
self.hidden_size = hidden_size
self.num_embodiments = num_embodiments
\# W1: R^{w x d}, W2: R^{w x 2w}, W3: R^{w x w}
self.W1 = CategorySpecificLinear(num_embodiments, action_dim, hidden_size) # (d -> w)
def forward(self, actions, timesteps, cat_ids):
"""
Args:
actions: \[B, T, action_dim\] action tensor
timesteps: \[B,\] timesteps - a single scalar per batch item
cat_ids: \[B,\] category/embodiment IDs
Returns:
\[B, T, hidden_size\] encoded action features
"""
\# print("actions.requires_grad:", actions.requires_grad)
B, T, \_ = actions.shape
\# 1) Expand each batch's single scalar time 'tau' across all T steps
\# so that shape => (B, T)
\# e.g. if timesteps is (B,), replicate across T
if timesteps.dim() == 1 and timesteps.shape\[0\] == B:
\# shape (B,) => (B,T)
timesteps = timesteps.unsqueeze(1).expand(-1, T)
else:
raise ValueError(
"Expected \`timesteps\` to have shape (B,) so we can replicate across T."
)
\# 2) Standard action MLP step for shape => (B, T, w)
a_emb = self.W1(actions, cat_ids)
return a_emb
class Model(nn.Module):
def \__init_\_(self):
super().\__init_\_()
self.model = MultiEmbodimentActionEncoder(action_dim=2, hidden_size=2, num_embodiments=3)
self.rtc = RTC()
def denoise_step(
self,
x_t: torch.Tensor,
timestep: torch.Tensor,
device: torch.device,
cat_ids: torch.Tensor,
) -> torch.Tensor:
"""
Simplified analog of \`\`Gr00tN1d6ActionHead.denoise_step\`\`.
The goal is to reproduce the gradient‑handling pattern from the real
implementation while only exercising the small encoder defined above.
The structure (\`\`with torch.enable_grad()\`\`, forcing \`\`requires_grad\`\` on
inputs/modules, toggling \`\`train()\`\` mode, and printing diagnostics) mirrors
the original exactly.
"""
with torch.enable_grad():
timestep.requires_grad\_(True)
\# forward pass through the encoder
action_features = self.model(x_t, timestep, cat_ids)
return action_features
@torch.no_grad()
def get_action(self,x: torch.Tensor, timesteps: torch.Tensor, cat_ids: torch.Tensor):
def denoise_step_partial(x_t):
return self.denoise_step(x_t, timesteps, device, cat_ids)
pred = self.rtc.denoise_step(x, denoise_step_partial)
return pred
class RTC:
def \__init_\_(self):
pass
def denoise_step(self, x_t: torch.Tensor, original_denoise_step_partial):
with torch.enable_grad():
x_t = x_t.clone().detach().requires_grad\_(True)
v_t = original_denoise_step_partial(x_t)
x1_t = x_t - v_t
err = x1_t
grad_outputs = err.clone().detach()
correction = torch.autograd.grad(x1_t, x_t, grad_outputs, retain_graph=False)\[0\]
return correction
NN = Model()
NN.to(device)
x = torch.randn(1, 5, 2, requires_grad=True, device=device) # [B, T, action_dim]
e=torch.long)
timesteps = torch.tensor([1.0], dtype=torch.float32).to(device) # [B,]
NN.eval()
with torch.enable_grad():
output = NN.get_action(x, timesteps, cat_ids)
Best,
Boyuan Su