Hello,
Me and my partner are working on an OD project from Kaggle, Stanford Dogs Dataset, where you have images for 120 breeds (classes), and one box annotation per image.
We used the PyTorch OD guide as a reference, although we have only one box per image and we don’t use masks, and managed to reach a point where we train our data, however with only batch sizes of 1,2 and 4. Whenever we try to raise the batch size above 4, we get an index error (IndexError: list index out of range).
What can we do to resolve this issue?
For simplicity, the code below excludes the imports and the steps for creating the following:
- train_paths, evel_paths, test_paths: lists of paths for the image files for the training, evaluation, and test sets respectively
- annotations_paths: a path to the annotations directory
- breed_dict: a dictionary whose keys are the class (breed) names, and values are increasing numbers from 0 to 119, number per breed
The code:
class DogsDS(Dataset):
def __init__(self, image_paths, annot_dir ,breed_dict, transform=None):
"""
The init function generates a list of the images, and a dict with three keys for labels, boxes and
size.
"""
self.paths = image_paths
self.annot_dir = annot_dir
self.breed_dict = breed_dict
self.transform = transform
# Here we store the data segments (image, annotation info)
self.images = []
self.annot_dict = {}
# Iterate over all classes and images and collect to lists
label_indices = []
self.boxes = []
size_list = []
for path in tqdm(self.paths):
# Split the path by "/"
split_path = path.split("/")
# We get something like: ['home',....,'Images','n02106550-Rottweiler','n02106550_5710.jpg']
# Get the label index
breed = split_path[-2]
label_idx = self.breed_dict[breed]
label_indices.append(label_idx)
img_image = Image.open(path)
# If I don't close the file I get "Too many open files exception", and if I close the file
# without a copy, I can't use it
self.images.append(img_image.copy())
img_image.close()
# Now the box
img_file_name = split_path[-1]
annot_name = img_file_name.split(".")[0]
annot_path = self.annot_dir + breed + "/" + annot_name
# Extract the box attributes from the path
tree = ET.parse(annot_path) # init parser for file given
root = tree.getroot() # idk what's it but it's from documentation
objects = root.findall('object') # finding all dogs. An array
for o in objects:
bndbox = o.find('bndbox') # reading border coordinates
# In coco format, bbox = [xmin, ymin, width, height]
# In pytorch, the input should be [xmin, ymin, xmax, ymax]
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
box = (xmin, ymin, xmax, ymax)
# boxes.append(box)
self.boxes.append(box)
# Now the size
sizes = root.findall('size') # An array
for size in sizes:
width = int(size.find('width').text)
height = int(size.find('height') .text)
size_list.append((width, height))
self.annot_dict["boxes"] = self.boxes #boxes
self.annot_dict["labels"] = label_indices
self.annot_dict["sizes"] = size_list
# Transformations to tensor
self.boxes = torch.as_tensor(self.boxes, dtype=torch.float32)
self.annot_dict["labels"] = torch.as_tensor(self.annot_dict["labels"], dtype=torch.int64)
def __len__(self):
"""
Total number of samples in the dataset
"""
return len(self.images)
def __getitem__(self, index):
"""
Get a sample from the dataset.
input: index
returns: a tuple of: image, label(index), box(tuple)
for example: (<image..>, 3, (114, 20, 298, 357))
"""
# If dataset is preloaded
image = self.images[index]
# Number of objects in the image. I have only 1 in all images. Basically for OD with more than one
# object in an image, we need to iterate this code as in the pytorch example
num_objs = 1
out_boxes = self.boxes[index]
# lables is already a tensor
labels = self.annot_dict["labels"][index]
## Tensorise img_id
image_id = torch.tensor([index])
## Box size
box_size = self.annot_dict["sizes"][index][0] * self.annot_dict["sizes"][index][1]
area = torch.as_tensor((box_size,), dtype=torch.float32)
# Iscrowd - this is a binary argument to indicate (when there are several objects) if the object is
# croud (zero) or true object (one). In our example of course it is 0
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
# May use transform function to transform samples
# e.g., random crop, whitening
if self.transform is not None:
image = self.transform(image)
# return image and label
return image, {"labels": labels,
"boxes": out_boxes, #boxes,
"image_id": image_id,
"area": area,
"iscrowd": iscrowd}#, "size": size}
# Must at least transform to tensor
transform = T.Compose([
T.Resize((224,224)),
T.ToTensor(),
])
train_ds = DogsDS(train_paths, annotations_paths, breed_dict, transform)
eval_ds = DogsDS(evel_paths, annotations_paths, breed_dict, transform)
test_ds = DogsDS(test_paths, annotations_paths, breed_dict, transform)
# Instanciate data loaders
train_loader = DataLoader(train_ds, batch_size=8, num_workers=0, shuffle=True)
eval_loader = DataLoader(eval_ds, batch_size=1, num_workers=0, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=1, num_workers=0, shuffle=True)
# Use GPU if available, otherwise stick with cpu
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
print("in_features", in_features)
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
# Need to add one class as a background
num_classes = len(breed_list) + 1
num_epochs = 2
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)
# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 1
for epoch in range(num_epochs):
model.train()
i = 0
for imgs, annotations in train_loader:
i += 1
print(i)
imgs = list(img.to(device) for img in imgs)
annotations = [{k: v.to(device) for k, v in annotations.items()} for annot in annotations]
loss_dict = model(imgs, annotations)
losses = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
losses.backward()
optimizer.step()
print(f'Iteration: {i}/{len_dataloader}, Loss: {losses}')
The error we get for a batch size of 8, for example, is:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-98-b426c8e48613> in <module>
33 # loss_dict = model(imgs, annot_list)
34
---> 35 loss_dict = model(imgs, annotations)
36 losses = sum(loss for loss in loss_dict.values())
37
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
/opt/anaconda3/lib/python3.7/site-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
45 raise ValueError("In training mode, targets should be passed")
46 original_image_sizes = [img.shape[-2:] for img in images]
---> 47 images, targets = self.transform(images, targets)
48 features = self.backbone(images.tensors)
49 if isinstance(features, torch.Tensor):
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
/opt/anaconda3/lib/python3.7/site-packages/torchvision/models/detection/transform.py in forward(self, images, targets)
34 for i in range(len(images)):
35 image = images[i]
---> 36 target = targets[i] if targets is not None else targets
37 if image.dim() != 3:
38 raise ValueError("images is expected to be a list of 3d tensors "
IndexError: list index out of range