Dear all,
When I tried running my code, cuda throws device-asserted error as below
05/20 20:42:13 Epoch: 1, LR: [0.1]
F0 size: torch.Size([4219, 32])
F1 size: torch.Size([4488, 32])
feature_generation Done
/home/hashswan/Desktop/FCGF/lib/trainer.py:1189: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
batch_corr = torch.tensor(input_dict['matching_inds'][0]).to(self.device)
normed_coord0 cuda: True
sk0_sk1_corr shape torch.Size([192, 2])
tensor(4456, device='cuda:0')
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [1,0,0], thread: [0,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [64,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [65,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [0,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [1,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [2,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [112,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [113,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [114,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [39,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [40,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [41,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [42,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [43,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [44,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [0,0,0], thread: [63,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
Traceback (most recent call last):
File "train.py", line 93, in <module>
main(config)
File "train.py", line 71, in main
trainer.train()
File "/home/hashswan/Desktop/FCGF/lib/trainer.py", line 165, in train
self._train_epoch(epoch)
File "/home/hashswan/Desktop/FCGF/lib/trainer.py", line 1272, in _train_epoch
pos1 = normed_coord1[pos1_idx]
RuntimeError: CUDA error: device-side assert triggered
Original segment of codes are as below.
coord0_i = input_dict['coords0'].to(self.device)
coord1_i = input_dict['coords1'].to(self.device)
out0 = DetectNet(coord0_i).squeeze(0)
out1 = DetectNet(coord1_i).squeeze(0)
#print("batch_size:",self.batch_size)
#print("out0 shape:",out0.shape)
#print("out1 shape:",out1.shape)
kpt0_i = out0
kpt1_i = out1
batch_corr = torch.tensor(input_dict['matching_inds'][0]).to(self.device)
coord0_i = sinput0.coordinates_at(0).float().to(self.device)
coord1_i = sinput1.coordinates_at(0).float().to(self.device)
N0 = coord0_i.shape[0]
N1 = coord1_i.shape[0]
#shape = np.array([N0,N1])
centroid0,centroid1 = input_dict['centroid'][0][0].float().to(self.device),input_dict['centroid'][0][1].float().to(self.device)
max0,max1 = input_dict['max'][0][0].float().to(self.device),input_dict['max'][0][1].float().to(self.device)
normed_coord0 = (coord0_i - centroid0)/max0
normed_coord1 = (coord0_i - centroid1)/max1
print("normed_coord0 cuda:",normed_coord0.is_cuda)
#dist0 = pdist(kpt0_i,normed_coord0)
dist0 = torch.sqrt(torch.sum((kpt0_i.unsqueeze(1) - normed_coord0.unsqueeze(0)).pow(2), 2)+1e-7)
#dist1 = pdist(kpt1_i,normed_coord1)
dist1 = torch.sqrt(torch.sum((kpt1_i.unsqueeze(1) - normed_coord1.unsqueeze(0)).pow(2), 2)+1e-7)
#print(dist0)
#closest point index
top32_0_idx = torch.argsort(dist0,dim=1)[:,0:32]
top32_1_idx = torch.argsort(dist1,dim=1)[:,0:32]
#print("32 closest neighbor0 idx shape:",top32_0_idx.shape)
#print("32 closest neighbor1 idx shape:",top32_1_idx.shape)
normed0 = torch.cat(self.keypoint_num*[normed_coord0.unsqueeze(0)],dim=0)
normed1 = torch.cat(self.keypoint_num*[normed_coord1.unsqueeze(0)],dim=0)
neighbor0 = normed0[torch.arange(self.keypoint_num)[:,None],top32_0_idx,:]
neighbor1 = normed1[torch.arange(self.keypoint_num)[:,None],top32_1_idx,:]
#print("32 closest neighbor0 shape:",neighbor0.shape)
#print("32 closest neighbor1 shape:",neighbor1.shape)
#distance between (kpt0,kpt0) and distance between(kpt1,kpt1)
#kpt_dist0 = pdist(kpt0_i,kpt0_i)
kpt_dist0 = torch.sqrt(torch.sum((kpt0_i.unsqueeze(1) - kpt0_i.unsqueeze(0)).pow(2), 2)+1e-7)
#kpt_dist1 = pdist(kpt1_i,kpt1_i)
kpt_dist1 = torch.sqrt(torch.sum((kpt1_i.unsqueeze(1) - kpt1_i.unsqueeze(0)).pow(2), 2)+1e-7)
#print("kpt0_distance:",kpt_dist0.shape)
#print("kpt1_distance:",kpt_dist1.shape)
sk0_sk0_dist = torch.sum(F.relu(0.05 - kpt_dist0))/(float(self.keypoint_num)**2)
sk1_sk1_dist = torch.sum(F.relu(0.05 - kpt_dist1))/(float(self.keypoint_num)**2)
#distance between (kpt0,Cpj0) and distance between(kpt,Cpj1)
#kpt_neighbor_dist0 = pdist(kpt0_i,neighbor0)
kpt_neighbor_dist0 = torch.sqrt(torch.sum((kpt0_i.unsqueeze(1) - neighbor0.unsqueeze(0)).pow(2), 2)+1e-7)
#kpt_neighbor_dist1 = pdist(kpt1_i,neighbor1)
kpt_neighbor_dist1 = torch.sqrt(torch.sum((kpt1_i.unsqueeze(1) - neighbor1.unsqueeze(0)).pow(2), 2)+1e-7)
#print("kpt0 to neighbor distance:",kpt_neighbor_dist0.shape)
#print("kpt1 to neighbor distance:",kpt_neighbor_dist1.shape)
sk0_Cpj0 = F.relu(kpt_neighbor_dist0-0.05).sum() / (kpt_neighbor_dist0.shape[0]*kpt_neighbor_dist0.shape[1])
sk1_Cpj1 = F.relu(kpt_neighbor_dist1-0.05).sum() / (kpt_neighbor_dist1.shape[0]*kpt_neighbor_dist1.shape[1])
#unique_pos0 = batch_corr[:,0].unique(dim=0)
#print("num of unique_pos0",batch_corr[:,0].shape)
# all positive in one fragment
pos0 = normed_coord0[batch_corr[:,0]]
#distance of sk0 to pos0
#pos0_skpt0 = pdist(kpt0_i,pos0)
pos0_skpt0 = torch.sqrt(torch.sum((kpt0_i.unsqueeze(1) - pos0.unsqueeze(0)).pow(2), 2)+1e-7)
#print("pos0 to sk0 distance:",pos0_skpt0)
#closest pos0 to sk0
_,pos0_closest = torch.min(pos0_skpt0,dim=1) #t2
#pos0_closest = torch.argsort(dist0,dim=1)[:,0]
#sk0_pos0_dist = sk0[]
#skpt0 closest points
#print("kpt0_i:",kpt0_i,kpt0_i.shape)
pos0_points = pos0[pos0_closest]
#print("pos0_points:",pos0_points,pos0_points.shape)
sk0_pos0_dist = torch.sqrt(torch.sum((kpt0_i - pos0_points)**2,dim=1) + 1e-7)
#torch.sqrt(torch.sum((kpt0_i.unsqueeze(1) - pos0.unsqueeze(0)).pow(2), 2)+1e-7)
#print("closest positive points:",pos0_points.shape)
#find closest point indice in original cloud
idx = torch.all(normed_coord0[:,None,:]==pos0_points[None,:,:],axis=-1).nonzero()
#print("closest point in og cloud:",idx)
_,sort = torch.sort(idx[:,1])
skpt0_idx_pcd0 = idx[:,0][sort]
#print("batch_corr:",batch_corr[:,0])
#print("skpt0_idx_pcd0:",skpt0_idx_pcd0)
#print("skpt0_idx_pcd0[:,None]:",skpt0_idx_pcd0[:,None])
#find sk0_sk1 correspondence
sel = (batch_corr[:,0] == skpt0_idx_pcd0[:,None]).nonzero()[:,1]
sk0_sk1_corr = batch_corr[sel]
#print("sk0_sk1_corr shape:",sk0_sk1_corr.shape)
#find pos1 to sk1 and their distance
pos1_idx = sk0_sk1_corr[:,1]
print("sk0_sk1_corr shape",sk0_sk1_corr.shape)
print(torch.max(pos1_idx))
pos1 = normed_coord1[pos1_idx]
#print("pos1 shape:",pos1.shape)
#pos1_skpt1 = pdist(kpt1_i,pos1)
sk1_pos1_dist = F.pairwise_distance(kpt1_i, pos1, p=2.0, eps=1e-06, keepdim=False)
#pos1_skpt1 = torch.sqrt(torch.sum((kpt1_i.unsqueeze(1) - pos1.unsqueeze(0)).pow(2), 2)+1e-7)
#print("pos1 to sk1 distance:",pos1_skpt1.shape)
#sk1_pos1_dist,_ = torch.min(pos1_skpt1,dim=1)
#append (pos0,sk0) and (pos1,sk1)
print("sk0_pos0_dist shape:",sk0_pos0_dist.shape)
print("sk1_pos1_dist shape:",sk1_pos1_dist.shape)
sk0_pos0 = torch.sum(sk0_pos0_dist)/float(self.keypoint_num)
sk1_pos1 = torch.sum(sk1_pos1_dist)/float(self.keypoint_num)
I have checked that the max of pos1_idx are not out of bound for matrix normed_coord1
Do you have any idea why this happens?