Hi, I am looking for help with the following code:
SRAM_rows = 256
step_size = 1
prob_table = torch.eye(SRAM_rows2+1).cuda()
levels = torch.tensor([i-SRAM_rows for i in range(SRAM_rows2+1)]).cuda().float()
cmprob = 0
Expected_outputs = torch.tensor([0]*257).cuda().float()def quant_XNORSRAM(x, prob_table, levels, step_size, lower_bound):
x_ind = (x.type(torch.int64) - lower_bound) / step_size
num_levels = len(levels)
x_cdf = prob_table[x_ind, 0:num_levels-1].cumsum(dim=-1)
x_rand = torch.rand(x.shape, device=‘cuda:0’)
x_rand = torch.stack([x_rand] * (num_levels-1), dim=-1)
x_comp = (x_rand > x_cdf).type(torch.int64).sum(dim=-1)
#import pdb; pdb.set_trace()#here if cmprob is set, we add the ideal value + the noise if(cmprob): y = Expected_outputs[x_ind] + levels[x_comp] else: y = levels[x_comp] return y
Just by calling the quant_XNORSRAM function, my compute time is increased 20 times, can you please help me identify the issue causing this and how to fix it?
Thanks