Appending elements to torch.sparse is too slow

qnl · April 26, 2019, 5:49pm

Hi

I use torch.sparse for categorical data, but this class is too slow.

%%time
import torch

class SparseArray:
    def __init__(self):
        self.clear()
        
    def clear(self):
        self.array = torch.sparse.FloatTensor(1,0)
        
    def addFloat(self, v):        
        tmp = torch.FloatTensor([[v]]).to_sparse()
        self.array = torch.cat([self.array,tmp], dim=1)

    def addCategorical(self, idx, categorySize):
        #assert (idx < categorySize)
        #assert (0 <= idx)
        i = torch.LongTensor([[0],[idx]])
        v = torch.FloatTensor([1])
        tmp = torch.sparse.FloatTensor(i, v, torch.Size([1, categorySize]))
        self.array = torch.cat([self.array,tmp],dim=1)

sa = SparseArray()
for i in range(1024*16):
    sa.clear()
    sa.addFloat(5.0)
    sa.addCategorical(i%100,100)

This snippet outputs “Wall time: 2.54 s”.
Why is it so slow?

Thanks.

richard · April 26, 2019, 8:03pm

It would be good to have a baseline for comparison. How fast (or slow) is doing this with dense tensors?

qnl · April 26, 2019, 8:20pm

You’re right. Here is DenseArray code, and it takes only 25ms

%%time
import torch
import numpy as np

class DenseArray:
    def __init__(self):
        self.array = np.zeros(1024)
        self.idx = 0;
        
    def clear(self):
        self.array.fill(0.0)
        self.idx = 0
        
    def addFloat(self, v):        
        self.array[self.idx] = v
        self.idx += 1
        
    def addCategorical(self, idx, categorySize):
        #assert (idx < categorySize)
        #assert (0 <= idx)
        self.array[self.idx+idx] = 1.0
        self.idx += categorySize

da = DenseArray()
for i in range(1024*16):
    da.clear()
    da.addFloat(5.0)
    da.addCategorical(i%100,100)

qnl · April 27, 2019, 1:28pm

Finally, I found solution. It’s x1000 faster than first code.(25 ms ± 649 µs )

%%timeit
import torch
        
class SparseArray:
    def __init__(self):
        self.values = []
        self.indexs = []
        self.row = -1
        self.col = 0
        self.maxCol = 0
        
    def clear(self):
        self.row += 1
        self.maxCol = max(self.col,self.maxCol)
        self.col = 0
        
    def addFloat(self, v):        
        self.indexs.append([self.row, self.col])
        self.values.append(v)
        self.col += 1

    def addCategorical(self, idx, categorySize):
        assert (idx < categorySize)
        assert (0 <= idx)
        self.indexs.append([self.row, self.col+idx])
        self.values.append(1.0)
        self.col += categorySize
        
    def makeTensor(self):
        i = torch.LongTensor(self.indexs)
        v = torch.FloatTensor(self.values)
        return torch.sparse.FloatTensor(i.t(), v, torch.Size([self.row+1, self.maxCol]))

sa = SparseArray()
for i in range(1024*16):
    sa.clear()
    sa.addFloat(5.0)
    sa.addCategorical(i%100,100)
    
sa.makeTensor()