I’m using a 3D semantic segmentation loss as follows
from torch.autograd import Variable
import torch
import torch.nn.functional as F
def cross_entropy3d(pred, target, weight=None, size_average=True):
# input: (n, c, h, w), target: (n, h, w)
n, c, h, w , z = pred.size() # Get number of classes 'c'
log_pred = F.log_softmax(pred, dim=1)
log_pred = log_pred.permute(0, 4, 3, 2, 1).contiguous().view(-1, c) # Make channels (classes) the last dimension
log_pred = log_pred[target.view(-1, 1).repeat(1, c) < 255] # Extract only predictions with label < 255
log_pred = log_pred.view(-1, c) # Reshape for loss calculation (outputs [N x c] where c is number of classes and N = n * h * w * z
mask = target < 255
target = target[mask] # Extract only predictions with label < 255
loss = F.nll_loss(log_pred, target.view(-1), weight=weight, size_average=False)
if size_average:
loss /= mask.data.sum()
return loss
Now I need to subsample pred and target because of weight inbalance.
So I select all target variables with label > 0 and a random subsample of those where target == 0.
def cross_entropy3d_with_sampling(pred, target, weight=None, size_average=True):
# input: (n, c, h, w), target: (n, h, w)
n, c, h, w, z = pred.size()
log_pred = F.log_softmax(pred, dim=1)
log_pred = log_pred.permute(0, 4, 3, 2, 1).contiguous().view(-1, c)
log_pred = log_pred[target.view(-1, 1).repeat(1, c) < 255]
log_pred = log_pred.view(-1, c)
mask = target < 255
target = target[mask]
### Randomly subsample to make it contain N occupied voxels and 2N randomly sampled occluded voxels
occ_idx = target > 0 # Occupied voxels
empty_idx = (target == 0) # Empty voxels
pred_occ = log_pred[occ_idx.repeat(c, 1)] # Select occupied voxels
pred_occ = pred_occ.view(-1, c)
target_occ = target[occ_idx]
pred_empty = log_pred[empty_idx.repeat(c, 1)] # Select empty voxels
pred_empty = pred_empty.view(-1, c)
target_empty = target[empty_idx]
idx = torch.randperm(target_empty.shape[0]) # Randomly subsample empty voxels
idx = Variable(idx).cuda() # idx needs to be a torch.cuda Variable in order to be used in `index_select`
pred_empty = pred_empty.index_select(0, idx[0:sample_size]) # Select only sample_size voxels at random
target_empty = target_empty.index_select(0, idx[0:sample_size])
target_full = torch.cat([target_occ, target_empty], 0) # Concatenate occupied and randomly sampled empty voxels
log_pred_full = torch.cat([pred_occ, pred_empty], 0)
###
loss = F.nll_loss(log_pred_full, target_full.view(-1), weight=weight, size_average=False)
if size_average:
loss /= target_full.shape[0]
return loss
Weirdly the function above works fine until I call loss.backward()
. So there might be a problem with the derivation due to my sampling. Here is the error message I’m getting:
File "/home/myUser/anaconda2/envs/py27_source/lib/python2.7/site-packages/torch/autograd/variable.py", line 367, in masked_scatter
return self.clone().masked_scatter_(mask, variable)
RuntimeError: The expanded size of the tensor (12) must match the existing size (259764) at non-singleton dimension 1
It claims that my dimensions were wrong. However I checked and the dimensions are exactly the same as for the working function cross_entropy3d
.
Do I make some obvious mistake?
Maybe the usage of torch.randperm
for randomly selecting voxels is causing the trouble with the derivation that is needed for loss.backward()
?