If anyone is here looking for fast ways to select samples, I created a small comparison to time some of the popular random indexing solutions from the forums. For the comparison, I wrote small functions with the goal of generating indices to select 10% of a population. For a fair comparison, the indices are returned as GPU-tensors.
It looks like, if your population size is less than int32.MAX_VALUE, generating a random permutation on the GPU may be the fastest solution. Mileage may vary, so I’ve included my entire plotting script below so you can test it.
import numpy as np
import torch
import random
import time
import pandas as pd
import matplotlib.pyplot as plt
def rand(pop_size, num_samples):
"""Use random.sample to generate indices."""
return torch.Tensor(random.sample(range(pop_size), num_samples)).to('cuda')
def mult_cpu(pop_size, num_samples):
"""Use torch.Tensor.multinomial to generate indices on a CPU tensor."""
p = torch.ones(pop_size) / pop_size
return p.multinomial(num_samples=num_samples, replacement=False).to('cuda')
def mult_gpu(pop_size, num_samples):
"""Use torch.Tensor.multinomial to generate indices on a GPU tensor."""
p = torch.ones(pop_size, device='cuda') / pop_size
return p.multinomial(num_samples=num_samples, replacement=False)
def perm_cpu(pop_size, num_samples):
"""Use torch.randperm to generate indices on a CPU tensor."""
return torch.randperm(pop_size)[:num_samples].to('cuda')
def perm_gpu(pop_size, num_samples):
"""Use torch.randperm to generate indices on a GPU tensor."""
return torch.randperm(pop_size, device='cuda')[:num_samples]
def perm_gpu_f32(pop_size, num_samples):
"""Use torch.randperm to generate indices on a 32-bit GPU tensor."""
return torch.randperm(pop_size, dtype=torch.int32, device='cuda')[:num_samples]
def sort_rand_cpu(pop_size, num_samples):
"""Generate a random torch.Tensor (CPU) and sort it to generate indices."""
return torch.argsort(torch.rand(pop_size))[:num_samples]
def sort_rand_gpu(pop_size, num_samples):
"""Generate a random torch.Tensor (GPU) and sort it to generate indices."""
return torch.argsort(torch.rand(pop_size, device='cuda'))[:num_samples]
idx_fns = [rand,
mult_cpu,
mult_gpu,
perm_cpu,
perm_gpu,
perm_gpu_f32,
sort_rand_cpu,
sort_rand_gpu]
pop_size = np.logspace(3, 7, 50, dtype=int)
d = []
for n_p in pop_size:
print(f"Testing functions with {n_p} points.")
for fn in idx_fns:
tic = time.time()
num_samples = int(0.1 * n_p)
samples = fn(n_p, num_samples)
toc = time.time()
assert type(samples) == torch.Tensor
d.append({
'Population': n_p,
'Samples': num_samples,
'Function':fn.__name__,
'Time': toc - tic
})
df = pd.DataFrame(d)
fig, ax = plt.subplots()
for fn in idx_fns:
idxs = df['Function'] == fn.__name__
ax.plot(df['Population'][idxs], df['Time'][idxs], label=fn.__name__)
ax.set_title("Runtime of Index-Generating Methods for Randomly Selecting 10% of Population")
ax.set_xlabel('Population Size')
ax.set_ylabel('Time')
ax.set_yscale('log')
ax.legend(bbox_to_anchor=(1, 0.7))
plt.savefig('figs/randperm.png', bbox_inches='tight')
plt.show()