Is it possible to initialize only one layer when multiple LayerNorm layers need to be used in Pytorch? For example: self.layernorm = nn.LayerNorm() instead of writing self.layernorm1 = nn.LayerNorm() self.layernorm2 = nn.LayerNorm()
And I would like to know which other ones are possible to initialize only one and which ones are not.
Thanks for your answer, but I have some more questions, for dropout and relu, do they have learnable parameters?
For example,in the Transfromer encoderlayer code below I have defined 3 dropout layers, but if dropout doesn’t have learnable parameters wouldn’t it be better for me to define just one, why is it so complicated to define three:
class DecoderLayer(nn.Module):
def __init__(self, d_model, heads, dropout=0.1):
super().__init__()
self.norm_1 = Norm(d_model)
self.norm_2 = Norm(d_model)
self.norm_3 = Norm(d_model)
self.dropout_1 = nn.Dropout(dropout)
self.dropout_2 = nn.Dropout(dropout)
self.dropout_3 = nn.Dropout(dropout)
self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
self.ff = FeedForward(d_model, dropout=dropout)
def forward(self, x, e_outputs, src_mask, trg_mask):
x2 = self.norm_1(x)
x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
x2 = self.norm_2(x)
x = x + self.dropout_2(self.attn_2(x2, e_outputs, e_outputs, \
src_mask))
x2 = self.norm_3(x)
x = x + self.dropout_3(self.ff(x2))
return x