Yes, here it is.
Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16)) torch.Size([50, 3, 224, 224])
Encoder(
(dropout): Dropout(p=0.0, inplace=False)
(layers): Sequential(
(encoder_layer_0): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_1): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_2): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_3): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_4): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_5): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_6): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_7): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_8): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_9): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_10): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
(encoder_layer_11): EncoderBlock(
(ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(self_attention): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.0, inplace=False)
(ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(mlp): MLPBlock(
(linear_1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU()
(dropout_1): Dropout(p=0.0, inplace=False)
(linear_2): Linear(in_features=3072, out_features=768, bias=True)
(dropout_2): Dropout(p=0.0, inplace=False)
)
)
)
(ln): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
) torch.Size([50, 768, 14, 14])