I am using Pytorchvideo library for using the pretrained r2plus1d model. My goal is to extract features from different layers of this model. For this aim I extracted the features from different stages (I call the extracted features as stage_1, stage_2, stage_3 in the script):
Create each stage for R(2+1)D.
for idx in range(len(stage_depths)):
stage_dim_inner = stage_dim_out // 4
depth = stage_depths[idx]
stage_conv_b_stride = (
stage_temporal_stride[idx],
stage_spatial_stride[idx],
stage_spatial_stride[idx],
)
stage = create_res_stage(
depth=depth,
dim_in=stage_dim_in,
dim_inner=stage_dim_inner,
dim_out=stage_dim_out,
bottleneck=stage_bottleneck[idx],
conv_a_kernel_size=stage_conv_a_kernel_size[idx],
conv_a_stride=[1, 1, 1],
conv_a_padding=[size // 2 for size in stage_conv_a_kernel_size[idx]],
conv_b_kernel_size=stage_conv_b_kernel_size[idx],
conv_b_stride=stage_conv_b_stride,
conv_b_padding=[size // 2 for size in stage_conv_b_kernel_size[idx]],
conv_b_num_groups=stage_conv_b_num_groups[idx],
conv_b_dilation=stage_conv_b_dilation[idx],
norm=norm,
activation=activation,
)
blocks.append(stage)
if idx == 0:
stage_1 = Net(blocks=nn.ModuleList(blocks))
if idx == 1:
stage_2 = Net(blocks=nn.ModuleList(blocks))
if idx == 2:
stage_3 = Net(blocks=nn.ModuleList(blocks))
blocks.append(stage)
stage_dim_in = stage_dim_out
stage_dim_out = stage_dim_out * 2
return stage_1, stage_2, stage_3
The problem is that I face an error expressing that
How can I call the extracted layers?
Update
class Net(nn.Module):
“”"
Build a general Net models with a list of blocks for video recognition.
::
Input
↓
Block 1
↓
.
.
.
↓
Block N
↓
The ResNet builder can be found in `create_resnet`.
"""
def __init__(self, *, blocks: nn.ModuleList) -> None:
"""
Args:
blocks (torch.nn.module_list): the list of block modules.
"""
super().__init__()
assert blocks is not None
self.blocks = blocks
init_net_weights(self)
def forward(self, x: torch.Tensor) -> torch.Tensor:
for idx in range(len(self.blocks)):
x = self.blocks[idx](x)
return x