Convert keras code to pytorch

hieudd · February 21, 2020, 7:04am

Hi there, I’m also trying to translate the code from Keras to pytorch.
The original code is:

# user embedding
user_id = Input(shape=(1,), dtype='uint64')
user_embedding_layer= Embedding(user_count, MAX_SENTS, trainable=True)
user_embedding= user_embedding_layer(user_id)

user_embedding_word= Dense(200,activation='relu')(user_embedding)
user_embedding_word= Flatten()(user_embedding_word)

user_embedding_news= Dense(200,activation='relu')(user_embedding)
user_embedding_news= Flatten()(user_embedding_news)


# news embedding architecture
news_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedding_layer = Embedding(len(word_dict) , embedding_dim, weights=[embedding_mat],trainable=True)
embedded_sequences = embedding_layer(news_input)
embedded_sequences =Dropout(0.2)(embedded_sequences)

cnnouput = Conv1D(padding='same', activation='relu', strides=1, filters=embedding_dim, kernel_size=3)(embedded_sequences)
cnnouput=Dropout(0.2)(cnnouput)

attention_a = Dot((2, 1))([cnnouput, Dense(embedding_dim,activation='tanh')(user_embedding_word)])
attention_weight = Activation('softmax')(attention_a)
news_rep=keras.layers.Dot((1, 1))([cnnouput, attention_weight])
newsEncoder = Model([news_input,user_id], news_rep)


# clicked news embedding
all_news_input = [keras.Input((MAX_SENT_LENGTH,), dtype='int32') for _ in range(MAX_SENTS)]
browsed_news_rep = [newsEncoder([news,user_id]) for news in all_news_input]
browsed_news_rep =concatenate([Lambda(lambda x: K.expand_dims(x,axis=1))(news) for news in browsed_news_rep],axis=1)


# attention news
attention_news = keras.layers.Dot((2, 1))([browsed_news_rep, Dense(embedding_dim,activation='tanh')(user_embedding_news)])
attention_weight_news = Activation('softmax')(attention_news)
user_rep=keras.layers.Dot((1, 1))([browsed_news_rep, attention_weight_news])


# candidate news embedding
candidates = [keras.Input((MAX_SENT_LENGTH,), dtype='int32') for _ in range(1+npratio)]
candidate_vecs = [ newsEncoder([candidate,user_id]) for candidate in candidates]
logits = [keras.layers.dot([user_rep, candidate_vec], axes=-1) for candidate_vec in candidate_vecs]
logits = keras.layers.Activation(keras.activations.softmax)(keras.layers.concatenate(logits))


model = Model(candidates+all_news_input+[user_id], logits)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['acc'])


candidate_one = keras.Input((MAX_SENT_LENGTH,))
candidate_one_vec = newsEncoder([candidate_one,user_id])
score = keras.layers.Activation(keras.activations.sigmoid)(keras.layers.dot([user_rep, candidate_one_vec], axes=-1))
model_test = keras.Model([candidate_one]+all_news_input+[user_id], score)

I am now dividing it into 4 classes: UserEmbed, NewsEmbed, ClickPredictor and Main.
My work:

class UserEmbed(nn.Module):
    def __init__(self, user_count):
        super(UserEmbed, self).__init__()
        self.embed_word = nn.Sequential(
            nn.Embedding(user_count, MAX_SENTS),
            nn.Linear(MAX_SENTS, 200),
            nn.ReLU(),
            nn.Flatten())
        self.embed_news = nn.Sequential(
            nn.Embedding(user_count, MAX_SENTS),
            nn.Linear(MAX_SENTS, 200),
            nn.ReLU(),
            nn.Flatten())
    def forward(self, x):
        user_embedding_word = self.embed_word(x)
        user_embedding_news = self.embed_news(x)
        return (user_embedding_word, user_embedding_news)

Is the first class correct? I don’t know how to convert the remain.

klory · February 21, 2020, 5:47pm

class UserEmbed(nn.Module):
    def __init__(self, user_count):
        super(UserEmbed, self).__init__()
        self.commong = nn.Embedding(user_count, MAX_SENTS)
        self.fc_word = nn.Sequential(
            nn.Linear(MAX_SENTS, 200),
            nn.ReLU())
        self.fc_news = nn.Sequential(
            nn.Linear(MAX_SENTS, 200),
            nn.ReLU())
    def forward(self, x):
        emb = self.common(x)
        user_embedding_word = self.embed_word(emb)
        user_embedding_news = self.embed_news(emb)
        return (user_embedding_word, user_embedding_news)

since your output is [batch_size, num_neurons], there is no need for flatten, and in PyTorch, view() is more commonly used, not sure if there is an API called flatten now