```
class DeepR_v1(nn.Module):
def __init__(self, input_features, output_features, rest_pose, parent_indices, device):
super(DeepR_v1, self).__init__()
self.input_features = input_features
self.output_features = output_features
self.rest_pose = rest_pose
self.parent_indices = parent_indices
self.device = device
self.converter = nn.Sequential(
nn.Linear(input_features, 512),
nn.BatchNorm1d(512),
nn.ReLU(), # ReLU activation
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.ReLU(), # ReLU activation
nn.Linear(256, 128),
nn.BatchNorm1d(128),
nn.ReLU(), # ReLU activation
nn.Linear(128, output_features),
nn.Tanh() # Tanh activation
)
def axis_angle_to_quaternion(self, axis_angle: torch.Tensor) -> torch.Tensor:...
def quaternion_to_matrix(self, quaternions: torch.Tensor) -> torch.Tensor:...
def axis_angle_to_matrix(self, axis_angle: torch.Tensor) -> torch.Tensor:...
def make_4x4_transforms(self, rot, pelv_pos):...
def fk(self, rest_rel_local_transforms):
"""
Compute the global transforms for multiple frames given the rest-relative local transforms,
rest pose, and parent indices for each bone.
Args:
rest_rel_local_transforms (torch.Tensor): The rest-relative local transforms with shape (num_frames, num_bones, 4, 4).
rest_pose (torch.Tensor): The rest pose transform with shape (num_bones, 4, 4).
parent_indices (torch.Tensor): The parent indices for each bone with shape (num_bones).
Returns:
torch.Tensor: The global transforms with shape (num_frames, num_bones, 4, 4).
"""
# Get the number of frames and bones from the shape of the input transforms
num_frames, num_bones, _, _ = rest_rel_local_transforms.shape
# Initialize the global transforms tensor with the same shape as the input transforms
global_transforms = torch.zeros_like(rest_rel_local_transforms)
# Compute the local transforms for all frames by multiplying the rest pose with the rest-relative local transforms
local_transforms = self.rest_pose.unsqueeze(0).repeat(num_frames, 1, 1, 1) @ rest_rel_local_transforms
# Initialize the global transform for the first bone (assuming it has no parent)
global_transforms[:, 0] = local_transforms[:, 0] # Assuming the first bone has no parent (parent_indices[0] == -1)
# Use a loop to compute global transforms for the remaining bones for all frames
for bone_idx in range(1, num_bones):
# Get the parent index for the current bone
parent_idx = self.parent_indices[bone_idx]
# Compute the global transform for the current bone by multiplying the parent's global transform with the current local transform
t = global_transforms[:, parent_idx] @ local_transforms[:, bone_idx]
global_transforms[:, bone_idx] = t
return global_transforms
def forward(self, x):
y = self.converter(x)
r = y[:, :-3]
rot = r.reshape(r.shape[0], r.shape[1]//3, 3)
pelv_pos = y[:, -3:]
r_mat = self.axis_angle_to_matrix(rot)
rest_rel_local_transforms = self.make_4x4_transforms(r_mat, pelv_pos).to(self.device)
global_transforms = self.fk(rest_rel_local_transforms).to(self.device)
pos = global_transforms[:, :, :3, 3]
return rot, pelv_pos, pos
```

when I train this model I get a runtime error:

`RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [32, 4, 4]], which is output 0 of AsStridedBackward0, is at version 26; expected version 25 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!`

using torch.autograd.set_detect_anomaly(True) prints the following:

`File "C:\Users\mayur\AppData\Local\Temp\ipykernel_7976\2772885769.py", line 168, in fk t = global_transforms[:, parent_idx] @ local_transforms[:, bone_idx] (Triggered internally at [C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:116](file:///C:/cb/pytorch_1000000000000/work/torch/csrc/autograd/python_anomaly_mode.cpp:116).) return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass.`

Why is this happening?