I had a question regarding weight sharing. The current paper that I’m reimplementing has an option to use the embedding layer as a classification layer. Currently what I’ve done so far is use this if-else block inside my forward method (the code is a simplified version to get my point across):
class Model(nn.Module):
def __init__(self,
share_weights: bool,
hidden_dim: int,
num_items: int) -> None:
super().__init__()
self.share_weights = share_weights
self.embedding = nn.Embedding(num_items, hidden_dim)
self.classifier = nn.Linear(hidden_dim, num_items)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x_emb = self.embedding(x)
if self.share_weights:
outputs = x_emb @ self.embedding.weight.transpose(1, 0)
else:
outputs = self.classifier(x_emb)
return outputs
What I was wondering if I could simply assign the weights of the embedding layer to the classifier layer:
class Model(nn.Module):
def __init__(self,
share_weights: bool,
hidden_dim: int,
num_items: int) -> None:
super().__init__()
self.embedding = nn.Embedding(num_items, hidden_dim)
self.classifier = nn.Linear(hidden_dim, num_items)
if share_weights:
self.classifier.weight = self.embedding.weight.transpose(1, 0)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x_emb = self.embedding(x)
outputs = self.classifier(x_emb)
return outputs
and receive the same results since I’m not performing a deep copy or anything?