Hi there, I’m also trying to translate the code from Keras to pytorch.
The original code is:
# user embedding
user_id = Input(shape=(1,), dtype='uint64')
user_embedding_layer= Embedding(user_count, MAX_SENTS, trainable=True)
user_embedding= user_embedding_layer(user_id)
user_embedding_word= Dense(200,activation='relu')(user_embedding)
user_embedding_word= Flatten()(user_embedding_word)
user_embedding_news= Dense(200,activation='relu')(user_embedding)
user_embedding_news= Flatten()(user_embedding_news)
# news embedding architecture
news_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedding_layer = Embedding(len(word_dict) , embedding_dim, weights=[embedding_mat],trainable=True)
embedded_sequences = embedding_layer(news_input)
embedded_sequences =Dropout(0.2)(embedded_sequences)
cnnouput = Conv1D(padding='same', activation='relu', strides=1, filters=embedding_dim, kernel_size=3)(embedded_sequences)
cnnouput=Dropout(0.2)(cnnouput)
attention_a = Dot((2, 1))([cnnouput, Dense(embedding_dim,activation='tanh')(user_embedding_word)])
attention_weight = Activation('softmax')(attention_a)
news_rep=keras.layers.Dot((1, 1))([cnnouput, attention_weight])
newsEncoder = Model([news_input,user_id], news_rep)
# clicked news embedding
all_news_input = [keras.Input((MAX_SENT_LENGTH,), dtype='int32') for _ in range(MAX_SENTS)]
browsed_news_rep = [newsEncoder([news,user_id]) for news in all_news_input]
browsed_news_rep =concatenate([Lambda(lambda x: K.expand_dims(x,axis=1))(news) for news in browsed_news_rep],axis=1)
# attention news
attention_news = keras.layers.Dot((2, 1))([browsed_news_rep, Dense(embedding_dim,activation='tanh')(user_embedding_news)])
attention_weight_news = Activation('softmax')(attention_news)
user_rep=keras.layers.Dot((1, 1))([browsed_news_rep, attention_weight_news])
# candidate news embedding
candidates = [keras.Input((MAX_SENT_LENGTH,), dtype='int32') for _ in range(1+npratio)]
candidate_vecs = [ newsEncoder([candidate,user_id]) for candidate in candidates]
logits = [keras.layers.dot([user_rep, candidate_vec], axes=-1) for candidate_vec in candidate_vecs]
logits = keras.layers.Activation(keras.activations.softmax)(keras.layers.concatenate(logits))
model = Model(candidates+all_news_input+[user_id], logits)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['acc'])
candidate_one = keras.Input((MAX_SENT_LENGTH,))
candidate_one_vec = newsEncoder([candidate_one,user_id])
score = keras.layers.Activation(keras.activations.sigmoid)(keras.layers.dot([user_rep, candidate_one_vec], axes=-1))
model_test = keras.Model([candidate_one]+all_news_input+[user_id], score)
I am now dividing it into 4 classes: UserEmbed
, NewsEmbed
, ClickPredictor
and Main
.
My work:
class UserEmbed(nn.Module):
def __init__(self, user_count):
super(UserEmbed, self).__init__()
self.embed_word = nn.Sequential(
nn.Embedding(user_count, MAX_SENTS),
nn.Linear(MAX_SENTS, 200),
nn.ReLU(),
nn.Flatten())
self.embed_news = nn.Sequential(
nn.Embedding(user_count, MAX_SENTS),
nn.Linear(MAX_SENTS, 200),
nn.ReLU(),
nn.Flatten())
def forward(self, x):
user_embedding_word = self.embed_word(x)
user_embedding_news = self.embed_news(x)
return (user_embedding_word, user_embedding_news)
Is the first class correct? I don’t know how to convert the remain.