Nan loss in In the pretrained fastercnn

I am learning the object detection fine-tuning tutorial in the pytorch tutorial, and there is the case that loss is nan during the training.

This is my code:

        anno={'image':image,'bboxes':[box],'id':[1]}
        aug=self.transform(**anno)

        image=torch.from_numpy(aug['image']).permute(2,0,1).float()
        box=torch.as_tensor(np.array(aug['bboxes']),dtype=torch.float32)

        labels=torch.ones(1,dtype=torch.int64)
        image_id=torch.tensor([index])
        area=(box[:,3]-box[:,1])*(box[:,2]-box[:,0])
        iscrowd=torch.zeros(1,dtype=torch.uint8)
        target={}
        target['boxes']=box
        target['labels']=labels
        target['image_id']=image_id
        target['area']=area
        target['iscrowd']=iscrowd
        print(box)
        return image,target

from torchvision.models.detection.faster_rcnn import  FastRCNNPredictor
model=torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_class=2
in_features=model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor=FastRCNNPredictor(in_features,num_class)
print(model)

num_epoch=20
params=[p for p in model.parameters() if p.requires_grad]
optimizer=torch.optim.SGD(params,lr=0.005,momentum=0.9,weight_decay=0.0005)
lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
for epoch in range(num_epoch):
    train_one_epoch(model,optimizer,train_dl,device,epoch,print_freq=10)
    lr_scheduler.step()
    evaluate(model,valid_dl,device)

model_path=path/'box.pt'
model.save(model.state_dict(),model_path)

Basically all the processes follow the tutorial!

While I was looking for a solution, I saw that it might be box’s xim>xmax

So I printed the box, but it didn’t happen!

Here’s what’s wrong:

What should I do?

Could you post some dummy inputs to reproduce this issue or are you using the same dataset as described in the tutorial?

I used a dataset from the kaggle tournament, and this is the link to the tournament:kaggle

The data set above is the data set for the image classification, and then this link is the boundary box data for part of the image:bbox

The following code is the code I implemented for the dataset, part of which is part of my post above:

data_dir=path/'fish'/'train'/'train'
bbox_file=path/'cropping.txt'
#%%
train_aug=A.Compose([
    A.HorizontalFlip(),
    A.VerticalFlip(),
    A.Rotate(45),
    A.Resize(128,128),
],bbox_params=A.BboxParams(format='pascal_voc',label_fields=['id'])
)
valid_aug=A.Compose([
    A.Resize(128,128),
],bbox_params=A.BboxParams(format='pascal_voc',label_fields=['id'])
)
#%%
class myDataset(Data.Dataset):
    def __init__(self,transform):
        super(myDataset, self).__init__()
        self.transform=transform

    def prepare(self):
        bbox_info=[]
        with open(bbox_file, 'r') as f:
            lines = f.readlines()
            for line in lines:
                line = line.strip().split(',')
                name = line[0]
                coord = line[1:]
                data = (name, [(int(coord[i]), int(coord[i + 1])) for i in range(0, len(coord), 2)])
                bbox_info.append(data)
        self.bbox_info=bbox_info


    def get_box(self,list):
        x0, y0 = list[0]
        x1, y1 = x0, y0
        for x, y in list[1:]:
            x0 = min(x0, x)
            y0 = min(y0, y)
            x1 = max(x1, x)
            y1 = max(y1, y)
        return x0, y0, x1, y1


    def __getitem__(self, index):
        bbox_info=self.bbox_info[index]
        image_name=bbox_info[0]
        coord=bbox_info[1]
        box=self.get_box(coord)
        image_path=data_dir/image_name
        image=I.open(image_path).convert('RGB')
        image=np.array(image)


        anno={'image':image,'bboxes':[box],'id':[1]}
        aug=self.transform(**anno)
        image=torch.from_numpy(aug['image']).permute(2,0,1).float()
        box=torch.as_tensor(np.array(aug['bboxes']),dtype=torch.float32)
        labels=torch.ones(1,dtype=torch.int64)
        image_id=torch.tensor([index])
        area=(box[:,3]-box[:,1])*(box[:,2]-box[:,0])
        iscrowd=torch.zeros(1,dtype=torch.uint8)
        target={}
        target['boxes']=box
        target['labels']=labels
        target['image_id']=image_id
        target['area']=area
        target['iscrowd']=iscrowd
        print(box)
        return image,target

    def __len__(self):
        return len(self.bbox_info)

train_ds=myDataset(train_aug)
train_ds.prepare()

##This is the data set that I use for training

Then the training model is the model part of my post above。