this is implementation of forward method of transformer encoder and decoder module and i can’t see any positional encoding here if any one can clear that how the input of encoder/decoder give positional encoding information, i very appreciated.
forward method of TransformerDecoder
def forward(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:
output = tgt
for mod in self.layers:
output = mod(output, memory, tgt_mask=tgt_mask,
memory_mask=memory_mask,
tgt_key_padding_mask=tgt_key_padding_mask,
memory_key_padding_mask=memory_key_padding_mask)
if self.norm is not None:
output = self.norm(output)
return output
forward method of TransformerEncoder
def forward(self, src: Tensor, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
output = src
convert_to_nested = False
first_layer = self.layers[0]
if isinstance(first_layer, torch.nn.TransformerEncoderLayer):
if (not first_layer.norm_first and not first_layer.training and
first_layer.self_attn.batch_first and
first_layer.self_attn._qkv_same_embed_dim and first_layer.activation_relu_or_gelu and
first_layer.norm1.eps == first_layer.norm2.eps and
src.dim() == 3 and self.enable_nested_tensor) :
if src_key_padding_mask is not None and not output.is_nested and mask is None:
tensor_args = (
src,
first_layer.self_attn.in_proj_weight,
first_layer.self_attn.in_proj_bias,
first_layer.self_attn.out_proj.weight,
first_layer.self_attn.out_proj.bias,
first_layer.norm1.weight,
first_layer.norm1.bias,
first_layer.norm2.weight,
first_layer.norm2.bias,
first_layer.linear1.weight,
first_layer.linear1.bias,
first_layer.linear2.weight,
first_layer.linear2.bias,
)
if not torch.overrides.has_torch_function(tensor_args):
if not torch.is_grad_enabled() or all([not x.requires_grad for x in tensor_args]):
if output.is_cuda or 'cpu' in str(output.device):
convert_to_nested = True
output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not())
for mod in self.layers:
if convert_to_nested:
output = mod(output, src_mask=mask)
else:
output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)
if convert_to_nested:
output = output.to_padded_tensor(0.)
if self.norm is not None:
output = self.norm(output)
return output