I am trying to build a simple two tower recommender system on the MovieLens 100k dataset. The user tower is just a simple embedding layer. The item tower uses an embedding layer and concats that with a binary vector corresponding to genres. The concated features are then passed through a feed forward network. Given a user_id, item_id, item_features, I am trying to predict the user-item rating.
Q1: One thing I am unsure about is whether the embedding dimension is too large since I have more parameters in my model than I have user-item-rating pairs.
Model Parameters: 99440
Train Pairs size: 80668
Total Users x Total Items: 5942620
Q2: The other issue is: if I look at the cosine similarity score between the item embeddings (output of item_tower(item_id, item_feature)
), I see that all of them are close to 1 even the items that are supposed to be least similar. I am not sure what is causing this behavior.
TOP 4
title scores
Aladdin (1992) 1.000000
Hercules (1997) 0.999938
Muppet Treasure Island (1996) 0.999860
Little Mermaid, The (1989) 0.999844
BOT 3
Underworld: Awakening (2012) 0.988776
The Witch (2015) [Horror] 0.988495
Resident Evil: Retribution (2012) 0.987383
The code is below:
class TwoTower(torch.nn.Module):
def __init__(self, user_input, item_emb_input, item_emb_output, item_feat_input, embedding_dim):
super(TwoTower, self).__init__()
self.user_tower = UserTower(user_input, embedding_dim)
self.item_tower = ItemTower(item_emb_input, item_emb_output, item_feat_input, embedding_dim)
def forward(self, usr, itm, itm_feat):
return torch.sum(self.user_tower(usr) * self.item_tower(itm, itm_feat), dim=1)
class ItemTower(torch.nn.Module):
def __init__(self,
emb_in, emb_out,
feat_in,
embedding_dim
):
super(ItemTower, self).__init__()
self.item_embedding = torch.nn.Embedding(emb_in, emb_out)
self.ff = torch.nn.Sequential(
torch.nn.Linear(emb_out + feat_in, embedding_dim),
torch.nn.ReLU(),
torch.nn.Linear(embedding_dim, embedding_dim),
)
def forward(self, item_id, item_feat):
x = torch.cat((self.item_embedding(item_id), item_feat), dim=1)
return self.ff(x)
class UserTower(torch.nn.Module):
def __init__(self, input_dim, embedding_dim):
super(UserTower, self).__init__()
self.user_embedding = torch.nn.Embedding(input_dim, embedding_dim)
def forward(self, x):
return self.user_embedding(x)
The initialization
model = TwoTower(len(user_id_map), len(ds_movies), 8, len(ds_movies.columns[4:]), 32)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
criterion = torch.nn.MSELoss()
Train loop
loss_trn = []
loss_tst = []
num_epoch = 15
lam = 1e-1
for i in range(num_epoch):
for batch, (usr, itm, itm_feat, rat) in enumerate(ratings_dataloader_trn):
pred = model(usr, itm, itm_feat)
loss = criterion(pred, rat) + (
+ lam * torch.mean(torch.sum(model.user_tower.user_embedding.weight ** 2, dim=1))
+ lam * torch.mean(torch.sum(model.item_tower.item_embedding.weight ** 2, dim=1))
)
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 100 == 0:
loss_trn.append(criterion(pred, rat).item())
with torch.no_grad():
test_loss = 0.0
for (usr, itm, itm_feat, rat) in ratings_dataloader_tst:
pred = model(usr, itm, itm_feat)
test_loss += criterion(pred, rat).item()
test_loss /= len(ratings_dataloader_tst)
loss_tst.append(test_loss)