Hi,
I am trying to build a video retrieval system using cosine similarity. L2 distance could also be used as it could be written as || a - b || = 2 - 2 * <a, b>
, where a
, b
are both normalized vectors.
Now I have two matrice A: [N x d]
, B: [M x d]
L2 distance can be calculated in PyTorch as torch.pdist(A, B)
, cosine similarity as inner product torch.mm(A, B.transpose(0, 1))
. However, I found later to be much slower than the former. Any idea why?
Below is the code I used to do the comparison.
import time
import torch
import torch.nn.functional as F
import numpy as np
def compare_l2dist_inner_product_time(n_videos=2000, d=256, n_query=1000, n_runs=5):
st_time = time.time()
fake_database = F.normalize(torch.randn((n_videos, d), dtype=torch.float32).cuda(), dim=1, p=2)
fake_query = F.normalize(torch.randn((n_query, d), dtype=torch.float32).cuda(), dim=1, p=2)
print("Construct fake database + query time {}".format(time.time() - st_time))
print("fake_database shape {} fake_query shape {}".format(fake_database.shape, fake_query.shape))
times_l2dist = []
for _ in range(n_runs):
st_time = time.time()
l2_dist = torch.cdist(fake_query, fake_database, p=2) # (n_query, n_videos)
times_l2dist.append(time.time() - st_time)
avg_time_l2dist = np.mean(times_l2dist)
print("L2 Distance time {}".format(avg_time_l2dist))
times_ip = []
fake_database = fake_database.transpose(0, 1)
for _ in range(n_runs):
st_time = time.time()
inner_product = torch.mm(fake_query, fake_database) # (n_query, n_videos)
times_ip.append(time.time() - st_time)
avg_time_ip = np.mean(times_ip)
print("Inner Product time {}".format(avg_time_ip))
compare_l2dist_inner_product_time()
Output:
Construct fake database + query time 7.20833158493042
fake_database shape torch.Size([2000, 256]) fake_query shape torch.Size([1000, 256])
L2 Distance time 5.9604644775390625e-05
Inner Product time 0.07725939750671387