qnl
1
Hi
I use torch.sparse for categorical data, but this class is too slow.
%%time
import torch
class SparseArray:
def __init__(self):
self.clear()
def clear(self):
self.array = torch.sparse.FloatTensor(1,0)
def addFloat(self, v):
tmp = torch.FloatTensor([[v]]).to_sparse()
self.array = torch.cat([self.array,tmp], dim=1)
def addCategorical(self, idx, categorySize):
#assert (idx < categorySize)
#assert (0 <= idx)
i = torch.LongTensor([[0],[idx]])
v = torch.FloatTensor([1])
tmp = torch.sparse.FloatTensor(i, v, torch.Size([1, categorySize]))
self.array = torch.cat([self.array,tmp],dim=1)
sa = SparseArray()
for i in range(1024*16):
sa.clear()
sa.addFloat(5.0)
sa.addCategorical(i%100,100)
This snippet outputs “Wall time: 2.54 s”.
Why is it so slow?
Thanks.
richard
(Richard Zou)
2
It would be good to have a baseline for comparison. How fast (or slow) is doing this with dense tensors?
qnl
3
You’re right. Here is DenseArray code, and it takes only 25ms
%%time
import torch
import numpy as np
class DenseArray:
def __init__(self):
self.array = np.zeros(1024)
self.idx = 0;
def clear(self):
self.array.fill(0.0)
self.idx = 0
def addFloat(self, v):
self.array[self.idx] = v
self.idx += 1
def addCategorical(self, idx, categorySize):
#assert (idx < categorySize)
#assert (0 <= idx)
self.array[self.idx+idx] = 1.0
self.idx += categorySize
da = DenseArray()
for i in range(1024*16):
da.clear()
da.addFloat(5.0)
da.addCategorical(i%100,100)
qnl
4
Finally, I found solution. It’s x1000 faster than first code.(25 ms ± 649 µs )
%%timeit
import torch
class SparseArray:
def __init__(self):
self.values = []
self.indexs = []
self.row = -1
self.col = 0
self.maxCol = 0
def clear(self):
self.row += 1
self.maxCol = max(self.col,self.maxCol)
self.col = 0
def addFloat(self, v):
self.indexs.append([self.row, self.col])
self.values.append(v)
self.col += 1
def addCategorical(self, idx, categorySize):
assert (idx < categorySize)
assert (0 <= idx)
self.indexs.append([self.row, self.col+idx])
self.values.append(1.0)
self.col += categorySize
def makeTensor(self):
i = torch.LongTensor(self.indexs)
v = torch.FloatTensor(self.values)
return torch.sparse.FloatTensor(i.t(), v, torch.Size([self.row+1, self.maxCol]))
sa = SparseArray()
for i in range(1024*16):
sa.clear()
sa.addFloat(5.0)
sa.addCategorical(i%100,100)
sa.makeTensor()