Partially freeze embedding layer

Here is an example which used in bi-lstm.

embeding_size = 50
pretrain_word_embedding = # a numpy matrix for pretrained embedding
vocab_size = pretrain_word_embedding.size()[0]
freeze_word_embs = nn.Embedding( vocab_size, embedding_size )
freeze_word_embs.weight.data.copy_( torch.from_numpy( pretrain_word_embedding)
freeze_word_embs.weight.requires_grad = False

random_embs =  np.empty( [vocab_size, embedding_size])
scale = np.sqrt( 3.0/embedding_size)
for index in range(vocab_size):
    random_embs[index:]=np.random.uniform(-scale, scale, [ 1, embedding_dim])
unfreeze_word_embs = nn.Embedding( vocab_size, embedding_size)
unfreeze_word_embs.weight.data.copy_(random_embs)

word_inputs = # word id tensor with shape [batch_size, sentence_max_length] 
freeze_boundary=10 # if word_id < freeze_boundary: should pick freeze embedding
batch_size = word_inputs(0)
sent_len  = word_inputs(1)
freeze_embs = freeze_word_embs( word_inputs )
unfreeze_embs = unfreeze_word_embs( word_inputs )
word_embs = []
for i, w_input in enumerate( word_inputs.data ):
    freeze_emb = freeze_embs[ i ]
    unfreeze_emb = unfreeze_embs[ i ]
    word_emb = []
    for j, word_id in enumerate( w_input ):
        if word_id < freeze_bound:
            word_emb.append( freeze_emb[ j ] )
        else:
            word_emb.append( unfreeze_emb[j] )
    word_emb = torch.stack( word_emb )
    word_embs.append( word_emb )
word_embs = torch.stack( word_embs )
# word_embs is the final partial freeze embedding.
2 Likes