Hello, I’m new to Pytorch and exploring it with working on someone else’s codes. While working on multiclass semantic segmentation. (9 class + 1 background) I have encountered an error it says:
IndexError Traceback (most recent call last)
<ipython-input-23-11d5ffca1646> in <module>
8 steps_per_epoch=len(train_loader))
9
---> 10 history = fit(epoch, model, train_loader, val_loader, criterion, optimizer, sched)
<ipython-input-22-bd62e4e7466c> in fit(epochs, model, train_loader, val_loader, criterion, optimizer, scheduler, patch)
38 print(mask.max())
39 print(mask.min())
---> 40 loss = criterion(output, mask)
41 #evaluation metrics
42 iou_score += mIoU(output, mask)
~\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
1045 def forward(self, input: Tensor, target: Tensor) -> Tensor:
1046 assert self.weight is None or isinstance(self.weight, Tensor)
-> 1047 return F.cross_entropy(input, target, weight=self.weight,
1048 ignore_index=self.ignore_index, reduction=self.reduction)
1049
~\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2691 if size_average is not None or reduce is not None:
2692 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2693 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2694
2695
~\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2388 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2389 elif dim == 4:
-> 2390 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2391 else:
2392 # dim == 3 or dim > 4
IndexError: Target 11 is out of bounds.
Here is some information about output and mask.
torch.Size([3, 11, 704, 1056]) # shape of output
torch.Size([3, 704, 1056])#shape of mask
tensor(11)#max value of mask
tensor(1)#min value of mask
When I search online about it I see an equation like this :(max value of mask-1 = nb_classes )
torch.Size([3, 11, 704, 1056])
torch.Size([3, 704, 1056])
tensor(10)
tensor(1)
Proper numbers should look like this I guess. and it worked for 1 loop but after, it turned like this
torch.Size([3, 11, 704, 1056])
torch.Size([3, 704, 1056])
tensor(10)
tensor(1)
torch.Size([3, 11, 704, 1056])
torch.Size([3, 704, 1056])
tensor(11)
tensor(0)
And I got the same error.
Here is the training function.
def fit(epochs, model, train_loader, val_loader, criterion, optimizer, scheduler, patch=False):
torch.cuda.empty_cache()
train_losses = []
test_losses = []
val_iou = []; val_acc = []
train_iou = []; train_acc = []
lrs = []
min_loss = np.inf
decrease = 1 ; not_improve=0
model.to(device)
fit_time = time.time()
for e in range(epochs):
since = time.time()
running_loss = 0
iou_score = 0
accuracy = 0
#training loop
model.train()
for i, data in enumerate(tqdm(train_loader)):
#training phase
image_tiles, mask_tiles = data
if patch:
bs, n_tiles, c, h, w = image_tiles.size()
image_tiles = image_tiles.view(-1,c, h, w)
mask_tiles = mask_tiles.view(-1, h, w)
image = image_tiles.to(device); mask = mask_tiles.to(device);
#forward
output = model(image)
print(output.shape)
print(mask.shape)
print(mask.max())
print(mask.min())
loss = criterion(output, mask)
#evaluation metrics
iou_score += mIoU(output, mask)
accuracy += pixel_accuracy(output, mask)
#backward
loss.backward()
optimizer.step() #update weight
optimizer.zero_grad() #reset gradient
#step the learning rate
lrs.append(get_lr(optimizer))
scheduler.step()
running_loss += loss.item()
else:
model.eval()
test_loss = 0
test_accuracy = 0
val_iou_score = 0
#validation loop
with torch.no_grad():
for i, data in enumerate(tqdm(val_loader)):
#reshape to 9 patches from single image, delete batch size
image_tiles, mask_tiles = data
if patch:
bs, n_tiles, c, h, w = image_tiles.size()
image_tiles = image_tiles.view(-1,c, h, w)
mask_tiles = mask_tiles.view(-1, h, w)
image = image_tiles.to(device); mask = mask_tiles.to(device);
output = model(image)
#evaluation metrics
val_iou_score += mIoU(output, mask)
test_accuracy += pixel_accuracy(output, mask)
#loss
loss = criterion(output, mask)
test_loss += loss.item()
#calculatio mean for each batch
train_losses.append(running_loss/len(train_loader))
test_losses.append(test_loss/len(val_loader))
if min_loss > (test_loss/len(val_loader)):
print('Loss Decreasing.. {:.3f} >> {:.3f} '.format(min_loss, (test_loss/len(val_loader))))
min_loss = (test_loss/len(val_loader))
decrease += 1
if decrease % 5 == 0:
print('saving model...')
torch.save(model, 'Unet-Mobilenet_v2_mIoU-{:.3f}.pt'.format(val_iou_score/len(val_loader)))
if (test_loss/len(val_loader)) > min_loss:
not_improve += 1
min_loss = (test_loss/len(val_loader))
print(f'Loss Not Decrease for {not_improve} time')
if not_improve == 7:
print('Loss not decrease for 7 times, Stop Training')
break
#iou
val_iou.append(val_iou_score/len(val_loader))
train_iou.append(iou_score/len(train_loader))
train_acc.append(accuracy/len(train_loader))
val_acc.append(test_accuracy/ len(val_loader))
print("Epoch:{}/{}..".format(e+1, epochs),
"Train Loss: {:.3f}..".format(running_loss/len(train_loader)),
"Val Loss: {:.3f}..".format(test_loss/len(val_loader)),
"Train mIoU:{:.3f}..".format(iou_score/len(train_loader)),
"Val mIoU: {:.3f}..".format(val_iou_score/len(val_loader)),
"Train Acc:{:.3f}..".format(accuracy/len(train_loader)),
"Val Acc:{:.3f}..".format(test_accuracy/len(val_loader)),
"Time: {:.2f}m".format((time.time()-since)/60))
history = {'train_loss' : train_losses, 'val_loss': test_losses,
'train_miou' :train_iou, 'val_miou':val_iou,
'train_acc' :train_acc, 'val_acc':val_acc,
'lrs': lrs}
print('Total time: {:.2f} m' .format((time.time()- fit_time)/60))
return history
And here is the model.
model = smp.Unet('mobilenet_v2', encoder_weights='imagenet', classes=11, activation=None, encoder_depth=5, decoder_channels=[256, 128, 64, 32, 16])
I have no idea why the min-max value of the mask is increased by 1 (1/10 -->0/11) after the first loop.
I’m open to any kind of suggestions about the code. Thx for your time.